diff options
Diffstat (limited to 'Src/h264dec')
103 files changed, 58239 insertions, 0 deletions
diff --git a/Src/h264dec/dec_api.c b/Src/h264dec/dec_api.c new file mode 100644 index 00000000..aaaeb46a --- /dev/null +++ b/Src/h264dec/dec_api.c @@ -0,0 +1,393 @@ +#include "dec_api.h" +#include "global.h" +#include "nalu.h" +#include "image.h" +#include "meminput.h" +#include "output.h" +#include "fmo.h" +#include "erc_api.h" +#include "parset.h" +#include "memcache.h" +#include "block.h" +#include "optim.h" +#include "mc_prediction.h" +#include "vlc.h" +#include <stddef.h> // for offsetof + +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +OptimizedFunctions opt; + +DecoderParams *alloc_decoder(); +void Configure(VideoParameters *p_Vid, InputParameters *p_Inp); +void malloc_slice(InputParameters *p_Inp, VideoParameters *p_Vid); +void init (VideoParameters *p_Vid); +void free_slice (Slice *currSlice); +void free_img( VideoParameters *p_Vid); + +int sse2_flag = 0, mmx_flag=0, sse_flag=0, sse3_flag=0, sse4_1_flag=0; +int H264_Init() +{ + int flags_edx, flags_ecx; + #ifdef H264_IPP + ippStaticInit(); + #endif + +#ifdef _M_IX86 + _asm { + mov eax, 1 + cpuid + mov flags_edx, edx + mov flags_ecx, ecx + } + mmx_flag = flags_edx & 0x00800000; + sse_flag = flags_edx & 0x02000000; + sse2_flag = flags_edx & 0x04000000; + sse3_flag = flags_ecx & 0x00000001; + sse4_1_flag= flags_ecx & (1 << 19); + +#elif defined(_M_X64) + sse2_flag = 1; +#endif + +#ifdef _M_IX86 + /* if you get any compile errors here, you need to change biari.asm */ + BUILD_BUG_ON(offsetof(TextureInfoContexts, map_contexts) != 436); + BUILD_BUG_ON(offsetof(TextureInfoContexts, last_contexts) != 3252); + BUILD_BUG_ON(offsetof(TextureInfoContexts, one_contexts) != 6068); + BUILD_BUG_ON(offsetof(TextureInfoContexts, abs_contexts) != 6508); + + BUILD_BUG_ON(offsetof(Macroblock, p_Slice) != 0); + BUILD_BUG_ON(offsetof(Macroblock, p_Vid) != 4); + BUILD_BUG_ON(offsetof(Macroblock, qp) != 60); + BUILD_BUG_ON(offsetof(Macroblock, qpc) != 64); + BUILD_BUG_ON(offsetof(Macroblock, qp_scaled) != 72); + BUILD_BUG_ON(offsetof(Macroblock, cbp_blk) != 248); + BUILD_BUG_ON(offsetof(Macroblock, mb_field) != 344); + BUILD_BUG_ON(offsetof(Macroblock, read_and_store_CBP_block_bit) != 400); + + BUILD_BUG_ON(offsetof(Slice, tex_ctx) != 100); + BUILD_BUG_ON(offsetof(Slice, mb_rec) != 1696); + BUILD_BUG_ON(offsetof(Slice, mb_pred) != 928); + BUILD_BUG_ON(offsetof(Slice, coeff) != 15632); + BUILD_BUG_ON(offsetof(Slice, coeff_ctr) != 15760); + BUILD_BUG_ON(offsetof(Slice, pos) != 15764); + BUILD_BUG_ON(offsetof(Slice, cof) != 2464); + BUILD_BUG_ON(offsetof(Slice, last_dquant) != 88); + BUILD_BUG_ON(offsetof(Slice, mot_ctx) != 96); + BUILD_BUG_ON(offsetof(Slice, slice_type) != 64); + + + BUILD_BUG_ON(offsetof(StorablePicture, structure) != 0); + BUILD_BUG_ON(offsetof(StorablePicture, chroma_qp_offset) != 158688); + BUILD_BUG_ON(offsetof(StorablePicture, motion) != 158524); + BUILD_BUG_ON(offsetof(StorablePicture, plane_images) != 158512); + BUILD_BUG_ON(offsetof(StorablePicture, imgY) != 158512); + + + BUILD_BUG_ON(offsetof(VideoParameters, structure) != 697200); + BUILD_BUG_ON(offsetof(VideoParameters, bitdepth_chroma_qp_scale) != 697456); + BUILD_BUG_ON(offsetof(VideoParameters, dec_picture) != 698192); + + BUILD_BUG_ON(offsetof(DecodingEnvironment, Dcodestrm_len) != 16); + BUILD_BUG_ON(offsetof(DecodingEnvironment, Dcodestrm) != 12); + BUILD_BUG_ON(offsetof(DecodingEnvironment, DbitsLeft) != 8); + BUILD_BUG_ON(offsetof(DecodingEnvironment, Dvalue) != 4); + BUILD_BUG_ON(offsetof(DecodingEnvironment, Drange) != 0); + + BUILD_BUG_ON(sizeof(BiContextType) != 4); + BUILD_BUG_ON(offsetof(BiContextType, state) != 0); + BUILD_BUG_ON(offsetof(BiContextType, MPS) != 2); + + BUILD_BUG_ON(offsetof(OptimizedFunctions, copy_image_data_16x16_stride) != 32); +#endif + + if (sse2_flag) + { + //opt.itrans4x4 = itrans4x4_mmx; + opt.itrans8x8 = itrans8x8_sse2; + opt.weighted_mc_prediction16x16 = weighted_mc_prediction16x16_sse2; + opt.weighted_mc_prediction16x8 = weighted_mc_prediction16x8_sse2; + opt.weighted_mc_prediction8x8 = weighted_mc_prediction8x8_sse2; + + opt.weighted_bi_prediction16x16 = weighted_bi_prediction16x16_sse2; + opt.weighted_bi_prediction16x8 = weighted_bi_prediction16x8_sse2; + opt.weighted_bi_prediction8x8 = weighted_bi_prediction8x8_sse2; + + opt.bi_prediction8x8 = bi_prediction8x8_sse2; + opt.copy_image_data_16x16_stride = copy_image_data_16x16_stride_sse; + opt.code_from_bitstream_2d_5_4 = code_from_bitstream_2d_5_4_sse2; + opt.code_from_bitstream_2d_17_4 = code_from_bitstream_2d_17_4_sse2; + opt.code_from_bitstream_2d_16_1 = code_from_bitstream_2d_16_1_sse2; + } + else if (sse_flag && mmx_flag) + { + //opt.itrans4x4 = itrans4x4_mmx; + opt.itrans8x8 = itrans8x8_c;//itrans8x8_mmx; + + opt.weighted_mc_prediction16x16 = weighted_mc_prediction16x16_ipp; + opt.weighted_mc_prediction16x8 = weighted_mc_prediction16x8_ipp; + opt.weighted_mc_prediction8x8 = weighted_mc_prediction8x8_ipp; + + opt.weighted_bi_prediction16x16 = weighted_bi_prediction16x16_ipp; + opt.weighted_bi_prediction16x8 = weighted_bi_prediction16x8_ipp; + opt.weighted_bi_prediction8x8 = weighted_bi_prediction8x8_ipp; + + opt.bi_prediction8x8 = bi_prediction8x8_ipp; + opt.copy_image_data_16x16_stride = copy_image_data_16x16_stride_sse; + opt.code_from_bitstream_2d_5_4 = code_from_bitstream_2d_5_4_c; + opt.code_from_bitstream_2d_17_4 = code_from_bitstream_2d_17_4_c; + opt.code_from_bitstream_2d_16_1 = code_from_bitstream_2d_16_1_c; + } + else + return 0; + + return 1; +} + +h264_decoder_t H264_CreateDecoder() +{ + DecoderParams *decoder=alloc_decoder(); + + if (decoder) + { + InputParameters *p_Inp = decoder->p_Inp; + Configure(decoder->p_Vid, p_Inp); + p_Inp->intra_profile_deblocking = 1; + + initBitsFile(decoder->p_Vid); + + malloc_slice(decoder->p_Inp, decoder->p_Vid); + init_old_slice(decoder->p_Vid->old_slice); + + init(decoder->p_Vid); + + init_out_buffer(decoder->p_Vid); + + decoder->p_Vid->current_mb_nr = -4711; // initialized to an impossible value for debugging -- correct value is taken from slice header + + } + return decoder; +} + +void H264_DestroyDecoder(h264_decoder_t d) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (decoder) + { + free_slice(decoder->p_Vid->currentSlice); + FmoFinit(decoder->p_Vid); + + free_global_buffers(decoder->p_Vid); + flush_dpb(decoder->p_Vid); + +#if (PAIR_FIELDS_IN_OUTPUT) + flush_pending_output(decoder->p_Vid); +#endif + + out_storable_pictures_destroy(decoder->p_Vid); + + ercClose(decoder->p_Vid, decoder->p_Vid->erc_errorVar); + + CleanUpPPS(decoder->p_Vid); + free_dpb(decoder->p_Vid); + uninit_out_buffer(decoder->p_Vid); + image_cache_flush(&decoder->p_Vid->image_cache[0]); + image_cache_flush(&decoder->p_Vid->image_cache[1]); + motion_cache_flush(&decoder->p_Vid->motion_cache); + FreeNALU(decoder->p_Vid->nalu); + free (decoder->p_Inp); + free_img (decoder->p_Vid); + free(decoder); + } +} + +void H264_DecodeFrame(h264_decoder_t d, const void *buffer, size_t bufferlen, uint64_t time_code) +{ + DecoderParams *decoder = (DecoderParams *)d; + int ret; + memory_input_t *mem_input = decoder->p_Vid->mem_input; + mem_input->user_buffer=buffer; + mem_input->user_buffer_size=bufferlen; + mem_input->user_buffer_read=0; + __try + { + ret = decode_one_frame(decoder->p_Vid, time_code); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + int x; + x=0; + } +#ifdef _M_IX86 + _mm_empty(); +#endif +} + +void H264_GetPicture(h264_decoder_t d, StorablePicture **pic) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (pic) + { + out_storable_picture_get(decoder->p_Vid, pic); + } +} + +static double GetAspectRatio(const vui_seq_parameters_t *vui) +{ + int aspect_ratio_width=1, aspect_ratio_height=1; + + if (vui->aspect_ratio_info_present_flag) + { + switch(vui->aspect_ratio_idc) + { + case VUI_AR_UNDEFINED: + case VUI_AR_SQUARE: + aspect_ratio_width = 1; + aspect_ratio_height = 1; + break; + case VUI_AR_12_11: + aspect_ratio_width = 12; + aspect_ratio_height = 11; + break; + case VUI_AR_10_11: + aspect_ratio_width = 10; + aspect_ratio_height = 11; + break; + case VUI_AR_16_11: + aspect_ratio_width = 16; + aspect_ratio_height = 11; + break; + case VUI_AR_40_33: + aspect_ratio_width = 40; + aspect_ratio_height = 33; + break; + case VUI_AR_24_11: + aspect_ratio_width = 24; + aspect_ratio_height = 11; + break; + case VUI_AR_20_11: + aspect_ratio_width = 20; + aspect_ratio_height = 11; + break; + case VUI_AR_32_11: + aspect_ratio_width = 32; + aspect_ratio_height = 11; + break; + case VUI_AR_80_33: + aspect_ratio_width = 80; + aspect_ratio_height = 33; + break; + case VUI_AR_18_11: + aspect_ratio_width = 18; + aspect_ratio_height = 11; + break; + case VUI_AR_15_11: + aspect_ratio_width = 15; + aspect_ratio_height = 11; + break; + case VUI_AR_64_33: + aspect_ratio_width = 64; + aspect_ratio_height = 33; + break; + case VUI_AR_160_99: + aspect_ratio_width = 160; + aspect_ratio_height = 99; + break; + case VUI_AR_4_3: + aspect_ratio_width = 4; + aspect_ratio_height = 3; + break; + case VUI_AR_3_2: + aspect_ratio_width = 3; + aspect_ratio_height = 2; + break;; + case VUI_AR_2_1: + aspect_ratio_width = 2; + aspect_ratio_height = 1; + break;; + case VUI_EXTENDED_SAR: + default: + aspect_ratio_width = vui->sar_width; + aspect_ratio_height = vui->sar_height; + break; + } + } + return (double)aspect_ratio_width / (double)aspect_ratio_height; +} + +const FrameFormat *H264_GetOutputFormat(h264_decoder_t d, double *aspect_ratio) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (decoder && decoder->p_Inp) + { + if (decoder->p_Vid->active_sps) + *aspect_ratio = GetAspectRatio(&decoder->p_Vid->active_sps->vui_seq_parameters); + + return &decoder->p_Inp->output; + } + else + return 0; +} + +void H264_Flush(h264_decoder_t d) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (decoder && decoder->p_Vid) + { + StorablePicture *pic=0; + exit_picture(decoder->p_Vid, &decoder->p_Vid->dec_picture); + if (pic) + free_storable_picture(decoder->p_Vid, pic); + pic=0; + + decoder->p_Vid->frame_num = 0; + decoder->p_Vid->pre_frame_num = INT_MIN; + decoder->p_Vid->PreviousFrameNum=0; + decoder->p_Vid->PreviousFrameNumOffset = 0; + decoder->p_Vid->PrevPicOrderCntLsb = 0; + decoder->p_Vid->PrevPicOrderCntMsb = 0; + flush_dpb(decoder->p_Vid); + + do + { + pic=0; + out_storable_picture_get(decoder->p_Vid, &pic); + if (pic) + free_storable_picture(decoder->p_Vid, pic); + } while (pic); + decoder->p_Vid->mem_input->resetting = 1; + } +} + +void H264_FreePicture(h264_decoder_t d, StorablePicture *p) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (decoder && decoder->p_Vid && p) + { + free_storable_picture(decoder->p_Vid, p); + } +} + +void H264_EndOfStream(h264_decoder_t d) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (decoder && decoder->p_Vid) + { + if (decoder->p_Vid->dec_picture) + exit_picture(decoder->p_Vid, &decoder->p_Vid->dec_picture); + else + flush_dpb(decoder->p_Vid); + } +} + +void H264_HurryUp(h264_decoder_t d, int state) +{ + DecoderParams *decoder = (DecoderParams *)d; + if (decoder && decoder->p_Vid) + { + memory_input_t *mem_input = decoder->p_Vid->mem_input; + if (mem_input) + mem_input->skip_b_frames = state; + } +}
\ No newline at end of file diff --git a/Src/h264dec/dec_api.h b/Src/h264dec/dec_api.h new file mode 100644 index 00000000..41811666 --- /dev/null +++ b/Src/h264dec/dec_api.h @@ -0,0 +1,25 @@ +#pragma once +#include <bfc/platform/types.h> +#ifdef __cplusplus +extern "C" { +#endif + #include "ldecod/inc/mbuffer.h" +#include "lcommon/inc/frame.h" + +typedef void *h264_decoder_t; + +int H264_Init(); // initializes the library. currently just does a CPU feature check (sse2, etc) +h264_decoder_t H264_CreateDecoder(); +void H264_DestroyDecoder(h264_decoder_t decoder); + +void H264_DecodeFrame(h264_decoder_t decoder, const void *buffer, size_t bufferlen, uint64_t time_code); +void H264_GetPicture(h264_decoder_t decoder, StorablePicture **pic); +void H264_FreePicture(h264_decoder_t decoder, StorablePicture *pic); +void H264_Flush(h264_decoder_t decoder); +void H264_EndOfStream(h264_decoder_t decoder); +void H264_HurryUp(h264_decoder_t decoder, int state); +const FrameFormat *H264_GetOutputFormat(h264_decoder_t decoder, double *aspect_ratio); + +#ifdef __cplusplus +} +#endif
\ No newline at end of file diff --git a/Src/h264dec/jm_vc9.sln b/Src/h264dec/jm_vc9.sln new file mode 100644 index 00000000..9d057c83 --- /dev/null +++ b/Src/h264dec/jm_vc9.sln @@ -0,0 +1,19 @@ +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ldecod", "ldecod_vc9.vcproj", "{5499B067-CF32-4141-A757-E0A29866994A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5499B067-CF32-4141-A757-E0A29866994A}.Debug|Win32.ActiveCfg = Debug|Win32 + {5499B067-CF32-4141-A757-E0A29866994A}.Debug|Win32.Build.0 = Debug|Win32 + {5499B067-CF32-4141-A757-E0A29866994A}.Release|Win32.ActiveCfg = Release|Win32 + {5499B067-CF32-4141-A757-E0A29866994A}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Src/h264dec/lcommon/inc/ctx_tables.h b/Src/h264dec/lcommon/inc/ctx_tables.h new file mode 100644 index 00000000..28d622f3 --- /dev/null +++ b/Src/h264dec/lcommon/inc/ctx_tables.h @@ -0,0 +1,994 @@ + +/*! + ************************************************************************************* + * \file ctx_tables.h + * + * \brief + * CABAC context initialization tables + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe <marpe@hhi.de> + * - Heiko Schwarz <hschwarz@hhi.de> + ************************************************************************************** + */ + +#define CTX_UNUSED {0,64} +#define CTX_UNDEF {0,63} + +#ifdef CONTEXT_INI_C + + +#define NUM_CTX_MODELS_I 1 +#define NUM_CTX_MODELS_P 3 + + +static const char INIT_MB_TYPE_I[1][3][11][2] = +{ + //----- model 0 ----- + { + { { 20, -15} , { 2, 54} , { 3, 74} , CTX_UNUSED , { -28, 127} , { -23, 104} , { -6, 53} , { -1, 54} , { 7, 51} , CTX_UNUSED , CTX_UNUSED }, + { { 20, -15} , { 2, 54} , { 3, 74} , { 20, -15} , { 2, 54} , { 3, 74} , { -28, 127} , { -23, 104} , { -6, 53} , { -1, 54} , { 7, 51} }, // SI (unused at the moment) + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; +static const char INIT_MB_TYPE_P[3][3][11][2] = +{ + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 23, 33} , { 23, 2} , { 21, 0} , CTX_UNUSED , { 1, 9} , { 0, 49} , { -37, 118} , { 5, 57} , { -13, 78} , { -11, 65} , { 1, 62} }, + { { 26, 67} , { 16, 90} , { 9, 104} , CTX_UNUSED , { -46, 127} , { -20, 104} , { 1, 67} , { 18, 64} , { 9, 43} , { 29, 0} , CTX_UNUSED } + }, + //----- model 1 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 22, 25} , { 34, 0} , { 16, 0} , CTX_UNUSED , { -2, 9} , { 4, 41} , { -29, 118} , { 2, 65} , { -6, 71} , { -13, 79} , { 5, 52} }, + { { 57, 2} , { 41, 36} , { 26, 69} , CTX_UNUSED , { -45, 127} , { -15, 101} , { -4, 76} , { 26, 34} , { 19, 22} , { 40, 0} , CTX_UNUSED } + }, + //----- model 2 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 29, 16} , { 25, 0} , { 14, 0} , CTX_UNUSED , { -10, 51} , { -3, 62} , { -27, 99} , { 26, 16} , { -4, 85} , { -24, 102} , { 5, 57} }, + { { 54, 0} , { 37, 42} , { 12, 97} , CTX_UNUSED , { -32, 127} , { -22, 117} , { -2, 74} , { 20, 40} , { 20, 10} , { 29, 0} , CTX_UNUSED } + } +}; + +static const char INIT_B8_TYPE_I[1][2][9][2] = +{ + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_B8_TYPE_P[3][2][9][2] = +{ + //----- model 0 ----- + { + { CTX_UNUSED , { 12, 49} , CTX_UNUSED , { -4, 73} , { 17, 50} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 86} , { -17, 95} , { -6, 61} , { 9, 45} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { CTX_UNUSED , { 9, 50} , CTX_UNUSED , { -3, 70} , { 10, 54} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 6, 69} , { -13, 90} , { 0, 52} , { 8, 43} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { CTX_UNUSED , { 6, 57} , CTX_UNUSED , { -17, 73} , { 14, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 93} , { -14, 88} , { -6, 44} , { 4, 55} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_MV_RES_I[1][2][10][2] = +{ + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_MV_RES_P[3][2][10][2] = +{ + //----- model 0 ----- + { + { { -3, 69} , CTX_UNUSED , { -6, 81} , { -11, 96} , CTX_UNUSED , { 0, 58} , CTX_UNUSED , { -3, 76} , { -10, 94} , CTX_UNUSED }, + { { 6, 55} , { 7, 67} , { -5, 86} , { 2, 88} , CTX_UNUSED , { 5, 54} , { 4, 69} , { -3, 81} , { 0, 88} , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -2, 69} , CTX_UNUSED , { -5, 82} , { -10, 96} , CTX_UNUSED , { 1, 56} , CTX_UNUSED , { -3, 74} , { -6, 85} , CTX_UNUSED }, + { { 2, 59} , { 2, 75} , { -3, 87} , { -3, 100} , CTX_UNUSED , { 0, 59} , { -3, 81} , { -7, 86} , { -5, 95} , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { -11, 89} , CTX_UNUSED , { -15, 103} , { -21, 116} , CTX_UNUSED , { 1, 63} , CTX_UNUSED , { -5, 85} , { -13, 106} , CTX_UNUSED }, + { { 19, 57} , { 20, 58} , { 4, 84} , { 6, 96} , CTX_UNUSED , { 5, 63} , { 6, 75} , { -3, 90} , { -1, 101} , CTX_UNUSED } + } +}; + +static const char INIT_REF_NO_I[1][2][6][2] = +{ + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_REF_NO_P[3][2][6][2] = +{ + //----- model 0 ----- + { + { { -7, 67} , { -5, 74} , { -4, 74} , { -5, 80} , { -7, 72} , { 1, 58} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -1, 66} , { -1, 77} , { 1, 70} , { -2, 86} , { -5, 72} , { 0, 61} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { 3, 55} , { -4, 79} , { -2, 75} , { -12, 97} , { -7, 50} , { 1, 60} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + + +static const char INIT_TRANSFORM_SIZE_I[1][1][3][2]= +{ + //----- model 0 ----- + { + { { 31, 21} , { 31, 31} , { 25, 50} }, +// { { 0, 41} , { 0, 63} , { 0, 63} }, + } +}; + +static const char INIT_TRANSFORM_SIZE_P[3][1][3][2]= +{ + //----- model 0 ----- + { + { { 12, 40} , { 11, 51} , { 14, 59} }, +// { { 0, 41} , { 0, 63} , { 0, 63} }, + }, + //----- model 1 ----- + { + { { 25, 32} , { 21, 49} , { 21, 54} }, +// { { 0, 41} , { 0, 63} , { 0, 63} }, + }, + //----- model 2 ----- + { + { { 21, 33} , { 19, 50} , { 17, 61} }, +// { { 0, 41} , { 0, 63} , { 0, 63} }, + } +}; + +static const char INIT_DELTA_QP_I[1][1][4][2]= +{ + //----- model 0 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + } +}; +static const char INIT_DELTA_QP_P[3][1][4][2]= +{ + //----- model 0 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + }, + //----- model 1 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + }, + //----- model 2 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + } +}; + +static const char INIT_MB_AFF_I[1][1][4][2] = +{ + //----- model 0 ----- + { + { { 0, 11} , { 1, 55} , { 0, 69} , CTX_UNUSED } + } +}; +static const char INIT_MB_AFF_P[3][1][4][2] = +{ + //----- model 0 ----- + { + { { 0, 45} , { -4, 78} , { -3, 96} , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { 13, 15} , { 7, 51} , { 2, 80} , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { 7, 34} , { -9, 88} , { -20, 127} , CTX_UNUSED } + } +}; + +static const char INIT_IPR_I[1][1][2][2] = +{ + //----- model 0 ----- + { + { { 13, 41} , { 3, 62} } + } +}; + +static const char INIT_IPR_P[3][1][2][2] = +{ + //----- model 0 ----- + { + { { 13, 41} , { 3, 62} } + }, + //----- model 1 ----- + { + { { 13, 41} , { 3, 62} } + }, + //----- model 2 ----- + { + { { 13, 41} , { 3, 62} } + } +}; + +static const char INIT_CIPR_I[1][1][4][2] = +{ + //----- model 0 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + } +}; + +static const char INIT_CIPR_P[3][1][4][2] = +{ + //----- model 0 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + }, + //----- model 1 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + }, + //----- model 2 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + } +}; + +static const char INIT_CBP_I[1][3][4][2] = +{ + //----- model 0 ----- + { + { { -17, 127} , { -13, 102} , { 0, 82} , { -7, 74} }, + { { -21, 107} , { -27, 127} , { -31, 127} , { -24, 127} }, + { { -18, 95} , { -27, 127} , { -21, 114} , { -30, 127} } + } +}; + +static const char INIT_CBP_P[3][3][4][2] = +{ + //----- model 0 ----- + { + { { -27, 126} , { -28, 98} , { -25, 101} , { -23, 67} }, + { { -28, 82} , { -20, 94} , { -16, 83} , { -22, 110} }, + { { -21, 91} , { -18, 102} , { -13, 93} , { -29, 127} } + }, + //----- model 1 ----- + { + { { -39, 127} , { -18, 91} , { -17, 96} , { -26, 81} }, + { { -35, 98} , { -24, 102} , { -23, 97} , { -27, 119} }, + { { -24, 99} , { -21, 110} , { -18, 102} , { -36, 127} } + }, + //----- model 2 ----- + { + { { -36, 127} , { -17, 91} , { -14, 95} , { -25, 84} }, + { { -25, 86} , { -12, 89} , { -17, 91} , { -31, 127} }, + { { -14, 76} , { -18, 103} , { -13, 90} , { -37, 127} } + } +}; + +static const char INIT_BCBP_I[1][22][4][2] = +{ + //----- model 0 ----- + { + { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} }, + { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { { -1, 74} , { -6, 97} , { -7, 91} , { -20, 127} }, + { { -4, 56} , { -5, 82} , { -7, 76} , { -22, 125} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cb in the 4:4:4 common mode + { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} }, + { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cr in the 4:4:4 common mode + { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} }, + { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_BCBP_P[3][22][4][2] = +{ + //----- model 0 ----- + { + { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} }, + { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { { 5, 54} , { 6, 60} , { 6, 59} , { 6, 69} }, + { { -1, 48} , { 0, 68} , { -4, 69} , { -8, 88} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cb in the 4:4:4 common mode + { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} }, + { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cr in the 4:4:4 common mode + { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} }, + { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} }, + { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { { 3, 55} , { 7, 56} , { 7, 55} , { 8, 61} }, + { { -3, 53} , { 0, 68} , { -7, 74} , { -9, 88} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cb in the 4:4:4 common mode + { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} }, + { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cr in the 4:4:4 common mode + { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} }, + { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} }, + { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { { 0, 65} , { -2, 79} , { 0, 72} , { -4, 92} }, + { { -6, 56} , { 3, 68} , { -8, 71} , { -13, 98} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cb in the 4:4:4 common mode + { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} }, + { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // Cr in the 4:4:4 common mode + { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} }, + { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_MAP_I[1][22][15][2] = +{ + //----- model 0 ----- + { + { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} }, + { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} }, + { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} }, + { { -8, 102} , { -15, 100} , { 0, 95} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { -4, 75} , { 2, 72} , { -11, 75} , { -3, 71} , { 15, 46} , { -13, 69} , { 0, 62} , { 0, 65} , { 21, 37} , { -15, 72} , { 9, 57} , { 16, 54} , { 0, 62} , { 12, 72} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} }, + { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} }, + { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} }, + //Cr in the 4:4:4 common mode + { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} }, + { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} }, + { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} } + } +}; + +static const char INIT_MAP_P[3][22][15][2] = +{ + //----- model 0 ----- + { + { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} }, + { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} }, + { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} }, + { { 3, 64} , { 1, 61} , { 9, 63} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 7, 50} , { 16, 39} , { 5, 44} , { 4, 52} , { 11, 48} , { -5, 60} , { -1, 59} , { 0, 59} , { 22, 33} , { 5, 44} , { 14, 43} , { -1, 78} , { 0, 60} , { 9, 69} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} }, + { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} }, + { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} }, + //Cr in the 4:4:4 common mode + { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} }, + { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} }, + { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} } + }, + //----- model 1 ----- + { + { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} }, + { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} }, + { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} }, + { { -4, 71} , { 0, 58} , { 7, 61} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 9, 41} , { 18, 25} , { 9, 32} , { 5, 43} , { 9, 47} , { 0, 44} , { 0, 51} , { 2, 46} , { 19, 38} , { -4, 66} , { 15, 38} , { 12, 42} , { 9, 34} , { 0, 89} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} }, + { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} }, + { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} }, + //Cr in the 4:4:4 common mode + { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} }, + { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} }, + { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} } + }, + //----- model 2 ----- + { + { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} }, + { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} }, + { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} }, + { { 3, 65} , { -7, 69} , { 8, 77} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { -10, 66} , { 3, 62} , { -3, 68} , { -20, 81} , { 0, 30} , { 1, 7} , { -3, 23} , { -21, 74} , { 16, 66} , { -23, 124} , { 17, 37} , { 44, -18} , { 50, -34} , { -22, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} }, + { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} }, + { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} }, + //Cr in the 4:4:4 common mode + { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} }, + { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} }, + { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} } + } +}; + +static const char INIT_LAST_I[1][22][15][2] = +{ + //----- model 0 ----- + { + { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} }, + { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} }, + { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} }, + { { 30, -6} , { 27, 3} , { 26, 22} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 37, -16} , { 35, -4} , { 38, -8} , { 38, -3} , { 37, 3} , { 38, 5} , { 42, 0} , { 35, 16} , { 39, 22} , { 14, 48} , { 27, 37} , { 21, 60} , { 12, 68} , { 2, 97} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} }, + { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} }, + { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} }, + //Cr in the 4:4:4 common mode + { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} }, + { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} }, + { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} } + } +}; + +static const char INIT_LAST_P[3][22][15][2] = +{ + //----- model 0 ----- + { + { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} }, + { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} }, + { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} }, + { { 1, 67} , { 5, 59} , { 9, 67} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 16, 30} , { 18, 32} , { 18, 35} , { 22, 29} , { 24, 31} , { 23, 38} , { 18, 43} , { 20, 41} , { 11, 63} , { 9, 59} , { 9, 64} , { -1, 94} , { -2, 89} , { -9, 108} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} }, + { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} }, + { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} }, + //Cr in the 4:4:4 common mode + { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} }, + { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} }, + { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} } + }, + //----- model 1 ----- + { + { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} }, + { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} }, + { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} }, + { { 0, 75} , { 2, 72} , { 8, 77} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 14, 35} , { 18, 31} , { 17, 35} , { 21, 30} , { 17, 45} , { 20, 42} , { 18, 45} , { 27, 26} , { 16, 54} , { 7, 66} , { 16, 56} , { 11, 73} , { 10, 67} , { -10, 116} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} }, + { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} }, + { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} }, + //Cr in the 4:4:4 common mode + { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} }, + { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} }, + { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} } + }, + //----- model 2 ----- + { + { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} }, + { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} }, + { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} }, + { { 20, 34} , { 19, 31} , { 27, 44} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 19, 16} , { 15, 36} , { 15, 36} , { 21, 28} , { 25, 21} , { 30, 20} , { 31, 12} , { 27, 16} , { 24, 42} , { 0, 93} , { 14, 56} , { 15, 57} , { 26, 38} , { -24, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} }, + { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} }, + { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} }, + //Cr in the 4:4:4 common mode + { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} }, + { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} }, + { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} } + } +}; + +static const char INIT_ONE_I[1][22][5][2] = +{ + //----- model 0 ----- + { + { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} }, + { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} }, + { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} }, + { { -11, 97} , { -20, 84} , { -11, 79} , { -6, 73} , { -4, 74} }, + { { -8, 78} , { -5, 33} , { -4, 48} , { -2, 53} , { -3, 62} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} }, + { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} }, + { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} }, + { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} }, + { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_ONE_P[3][22][5][2] = +{ + //----- model 0 ----- + { + { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} }, + { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} }, + { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} }, + { { 0, 70} , { -4, 29} , { 5, 31} , { 7, 42} , { 1, 59} }, + { { 0, 58} , { 8, 5} , { 10, 14} , { 14, 18} , { 13, 27} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} }, + { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} }, + { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} }, + { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} }, + { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} }, + { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} }, + { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} }, + { { 2, 66} , { -9, 34} , { 1, 32} , { 11, 31} , { 5, 52} }, + { { 3, 52} , { 7, 4} , { 10, 8} , { 17, 8} , { 16, 19} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} }, + { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} }, + { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} }, + { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} }, + { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} }, + { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} }, + { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} }, + { { -4, 79} , { -22, 69} , { -16, 75} , { -2, 58} , { 1, 58} }, + { { -13, 81} , { -6, 38} , { -13, 62} , { -6, 58} , { -2, 59} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} }, + { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} }, + { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} }, + { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} }, + { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_ABS_I[1][22][5][2] = +{ + //----- model 0 ----- + { + { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} }, + { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} }, + { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} }, + { { -13, 86} , { -13, 96} , { -11, 97} , { -19, 117} , CTX_UNUSED }, + { { -13, 71} , { -10, 79} , { -12, 86} , { -13, 90} , { -14, 97} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} }, + { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} }, + { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} }, + { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} }, + { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + +static const char INIT_ABS_P[3][22][5][2] = +{ + //----- model 0 ----- + { + { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} }, + { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} }, + { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} }, + { { -2, 58} , { -3, 72} , { -3, 81} , { -11, 97} , CTX_UNUSED }, + { { 2, 40} , { 0, 58} , { -3, 70} , { -6, 79} , { -8, 85} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} }, + { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} }, + { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} }, + { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} }, + { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + }, + //----- model 1 ----- + { + { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} }, + { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} }, + { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} }, + { { -2, 55} , { -2, 67} , { 0, 73} , { -8, 89} , CTX_UNUSED }, + { { 3, 37} , { -1, 61} , { -5, 73} , { -1, 70} , { -4, 78} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} }, + { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} }, + { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} }, + { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} }, + { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + }, + //----- model 2 ----- + { + { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} }, + { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} }, + { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} }, + { { -13, 78} , { -9, 83} , { -4, 81} , { -13, 99} , CTX_UNUSED }, + { { -16, 73} , { -10, 76} , { -13, 86} , { -9, 83} , { -10, 87} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cb in the 4:4:4 common mode + { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} }, + { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} }, + { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + //Cr in the 4:4:4 common mode + { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} }, + { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} }, + { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } +}; + + + +#if ENABLE_FIELD_CTX +static const char INIT_FLD_MAP_I[1][8][15][2] = +{ + //----- model 0 ----- + { + { { -6, 93} , { -6, 84} , { -8, 79} , { 0, 66} , { -1, 71} , { 0, 62} , { -2, 60} , { -2, 59} , { -5, 75} , { -3, 62} , { -4, 58} , { -9, 66} , { -1, 79} , { 0, 71} , { 3, 68} }, + { CTX_UNUSED , { 10, 44} , { -7, 62} , { 15, 36} , { 14, 40} , { 16, 27} , { 12, 29} , { 1, 44} , { 20, 36} , { 18, 32} , { 5, 42} , { 1, 48} , { 10, 62} , { 17, 46} , { 9, 64} }, + { { -14, 106} , { -13, 97} , { -15, 90} , { -12, 90} , { -18, 88} , { -10, 73} , { -9, 79} , { -14, 86} , { -10, 73} , { -10, 70} , { -10, 69} , { -5, 66} , { -9, 64} , { -5, 58} , { 2, 59} }, +// { { -1, 73} , { -7, 73} , { -6, 76} , { -7, 71} , { -9, 72} , { -5, 65} , { -14, 83} , { -8, 72} , { -10, 75} , { -5, 64} , { -4, 59} , { -13, 79} , { -9, 69} , { -8, 66} , { 3, 55} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 104} , { -11, 97} , { -16, 96} , { -7, 88} , { -8, 85} , { -7, 85} , { -9, 85} , { -13, 88} , { 4, 66} , { -3, 77} , { -3, 76} , { -6, 76} , { 10, 58} , { -1, 76} , { -1, 83} }, + { { -7, 99} , { -14, 95} , { 2, 95} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 0, 76} , { -5, 74} , { 0, 70} , { -11, 75} , { 1, 68} , { 0, 65} , { -14, 73} , { 3, 62} , { 4, 62} , { -1, 68} , { -13, 75} , { 11, 55} , { 5, 64} , { 12, 70} } + } +}; + +static const char INIT_FLD_MAP_P[3][8][15][2] = +{ + //----- model 0 ----- + { + { { -13, 106} , { -16, 106} , { -10, 87} , { -21, 114} , { -18, 110} , { -14, 98} , { -22, 110} , { -21, 106} , { -18, 103} , { -21, 107} , { -23, 108} , { -26, 112} , { -10, 96} , { -12, 95} , { -5, 91} }, + { CTX_UNUSED , { -9, 93} , { -22, 94} , { -5, 86} , { 9, 67} , { -4, 80} , { -10, 85} , { -1, 70} , { 7, 60} , { 9, 58} , { 5, 61} , { 12, 50} , { 15, 50} , { 18, 49} , { 17, 54} }, + { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} }, +// { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 10, 41} , { 7, 46} , { -1, 51} , { 7, 49} , { 8, 52} , { 9, 41} , { 6, 47} , { 2, 55} , { 13, 41} , { 10, 44} , { 6, 50} , { 5, 53} , { 13, 49} , { 4, 63} , { 6, 64} }, + { { -2, 69} , { -2, 59} , { 6, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 10, 44} , { 9, 31} , { 12, 43} , { 3, 53} , { 14, 34} , { 10, 38} , { -3, 52} , { 13, 40} , { 17, 32} , { 7, 44} , { 7, 38} , { 13, 50} , { 10, 57} , { 26, 43} } + }, + //----- model 1 ----- + { + { { -21, 126} , { -23, 124} , { -20, 110} , { -26, 126} , { -25, 124} , { -17, 105} , { -27, 121} , { -27, 117} , { -17, 102} , { -26, 117} , { -27, 116} , { -33, 122} , { -10, 95} , { -14, 100} , { -8, 95} }, + { CTX_UNUSED , { -17, 111} , { -28, 114} , { -6, 89} , { -2, 80} , { -4, 82} , { -9, 85} , { -8, 81} , { -1, 72} , { 5, 64} , { 1, 67} , { 9, 56} , { 0, 69} , { 1, 69} , { 7, 69} }, + { { -3, 81} , { -3, 76} , { -7, 72} , { -6, 78} , { -12, 72} , { -14, 68} , { -3, 70} , { -6, 76} , { -5, 66} , { -5, 62} , { 0, 57} , { -4, 61} , { -9, 60} , { 1, 54} , { 2, 58} }, +// { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -7, 69} , { -6, 67} , { -16, 77} , { -2, 64} , { 2, 61} , { -6, 67} , { -3, 64} , { 2, 57} , { -3, 65} , { -3, 66} , { 0, 62} , { 9, 51} , { -1, 66} , { -2, 71} , { -2, 75} }, + { { -1, 70} , { -9, 72} , { 14, 60} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 16, 37} , { 0, 47} , { 18, 35} , { 11, 37} , { 12, 41} , { 10, 41} , { 2, 48} , { 12, 41} , { 13, 41} , { 0, 59} , { 3, 50} , { 19, 40} , { 3, 66} , { 18, 50} } + }, + //----- model 2 ----- + { + { { -22, 127} , { -25, 127} , { -25, 120} , { -27, 127} , { -19, 114} , { -23, 117} , { -25, 118} , { -26, 117} , { -24, 113} , { -28, 118} , { -31, 120} , { -37, 124} , { -10, 94} , { -15, 102} , { -10, 99} }, + { CTX_UNUSED , { -13, 106} , { -50, 127} , { -5, 92} , { 17, 57} , { -5, 86} , { -13, 94} , { -12, 91} , { -2, 77} , { 0, 71} , { -1, 73} , { 4, 64} , { -7, 81} , { 5, 64} , { 15, 57} }, + { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} }, +// { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 67} , { 0, 68} , { -10, 67} , { 1, 68} , { 0, 77} , { 2, 64} , { 0, 68} , { -5, 78} , { 7, 55} , { 5, 59} , { 2, 65} , { 14, 54} , { 15, 44} , { 5, 60} , { 2, 70} }, + { { -2, 76} , { -18, 86} , { 12, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 5, 64} , { -12, 70} , { 11, 55} , { 5, 56} , { 0, 69} , { 2, 65} , { -6, 74} , { 5, 54} , { 7, 54} , { -6, 76} , { -11, 82} , { -2, 77} , { -2, 77} , { 25, 42} } + } +}; + +static const char INIT_FLD_LAST_I[1][8][15][2] = +{ + //----- model 0 ----- + { + { { 15, 6} , { 6, 19} , { 7, 16} , { 12, 14} , { 18, 13} , { 13, 11} , { 13, 15} , { 15, 16} , { 12, 23} , { 13, 23} , { 15, 20} , { 14, 26} , { 14, 44} , { 17, 40} , { 17, 47} }, + { CTX_UNUSED , { 24, 17} , { 21, 21} , { 25, 22} , { 31, 27} , { 22, 29} , { 19, 35} , { 14, 50} , { 10, 57} , { 7, 63} , { -2, 77} , { -4, 82} , { -3, 94} , { 9, 69} , { -12, 109} }, + { { 21, -10} , { 24, -11} , { 28, -8} , { 28, -1} , { 29, 3} , { 29, 9} , { 35, 20} , { 29, 36} , { 14, 67} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, +// { { 12, 33} , { 5, 38} , { 9, 34} , { 18, 22} , { 19, 22} , { 23, 19} , { 26, 16} , { 14, 44} , { 40, 14} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 36, -35} , { 36, -34} , { 32, -26} , { 37, -30} , { 44, -32} , { 34, -18} , { 34, -15} , { 40, -15} , { 33, -7} , { 35, -5} , { 33, 0} , { 38, 2} , { 33, 13} , { 23, 35} , { 13, 58} }, + { { 29, -3} , { 26, 0} , { 22, 30} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 31, -7} , { 35, -15} , { 34, -3} , { 34, 3} , { 36, -1} , { 34, 5} , { 32, 11} , { 35, 5} , { 34, 12} , { 39, 11} , { 30, 29} , { 34, 26} , { 29, 39} , { 19, 66} } + } +}; + +static const char INIT_FLD_LAST_P[3][8][15][2] = +{ + //----- model 0 ----- + { + { { 14, 11} , { 11, 14} , { 9, 11} , { 18, 11} , { 21, 9} , { 23, -2} , { 32, -15} , { 32, -15} , { 34, -21} , { 39, -23} , { 42, -33} , { 41, -31} , { 46, -28} , { 38, -12} , { 21, 29} }, + { CTX_UNUSED , { 45, -24} , { 53, -45} , { 48, -26} , { 65, -43} , { 43, -19} , { 39, -10} , { 30, 9} , { 18, 26} , { 20, 27} , { 0, 57} , { -14, 82} , { -5, 75} , { -19, 97} , { -35, 125} }, + { { 21, -13} , { 33, -14} , { 39, -7} , { 46, -2} , { 51, 2} , { 60, 6} , { 61, 17} , { 55, 34} , { 42, 62} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, +// { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 27, 0} , { 28, 0} , { 31, -4} , { 27, 6} , { 34, 8} , { 30, 10} , { 24, 22} , { 33, 19} , { 22, 32} , { 26, 31} , { 21, 41} , { 26, 44} , { 23, 47} , { 16, 65} , { 14, 71} }, + { { 8, 60} , { 6, 63} , { 17, 65} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 21, 24} , { 23, 20} , { 26, 23} , { 27, 32} , { 28, 23} , { 28, 24} , { 23, 40} , { 24, 32} , { 28, 29} , { 23, 42} , { 19, 57} , { 22, 53} , { 22, 61} , { 11, 86} } + }, + //----- model 1 ----- + { + { { 19, -6} , { 18, -6} , { 14, 0} , { 26, -12} , { 31, -16} , { 33, -25} , { 33, -22} , { 37, -28} , { 39, -30} , { 42, -30} , { 47, -42} , { 45, -36} , { 49, -34} , { 41, -17} , { 32, 9} }, + { CTX_UNUSED , { 69, -71} , { 63, -63} , { 66, -64} , { 77, -74} , { 54, -39} , { 52, -35} , { 41, -10} , { 36, 0} , { 40, -1} , { 30, 14} , { 28, 26} , { 23, 37} , { 12, 55} , { 11, 65} }, + { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, +// { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 37, -33} , { 39, -36} , { 40, -37} , { 38, -30} , { 46, -33} , { 42, -30} , { 40, -24} , { 49, -29} , { 38, -12} , { 40, -10} , { 38, -3} , { 46, -5} , { 31, 20} , { 29, 30} , { 25, 44} }, + { { 12, 48} , { 11, 49} , { 26, 45} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 22, 22} , { 23, 22} , { 27, 21} , { 33, 20} , { 26, 28} , { 30, 24} , { 27, 34} , { 18, 42} , { 25, 39} , { 18, 50} , { 12, 70} , { 21, 54} , { 14, 71} , { 11, 83} } + }, + //----- model 2 ----- + { + { { 17, -13} , { 16, -9} , { 17, -12} , { 27, -21} , { 37, -30} , { 41, -40} , { 42, -41} , { 48, -47} , { 39, -32} , { 46, -40} , { 52, -51} , { 46, -41} , { 52, -39} , { 43, -19} , { 32, 11} }, + { CTX_UNUSED , { 61, -55} , { 56, -46} , { 62, -50} , { 81, -67} , { 45, -20} , { 35, -2} , { 28, 15} , { 34, 1} , { 39, 1} , { 30, 17} , { 20, 38} , { 18, 45} , { 15, 54} , { 0, 79} }, + { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, +// { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 36, -16} , { 37, -14} , { 37, -17} , { 32, 1} , { 34, 15} , { 29, 15} , { 24, 25} , { 34, 22} , { 31, 16} , { 35, 18} , { 31, 28} , { 33, 41} , { 36, 28} , { 27, 47} , { 21, 62} }, + { { 18, 31} , { 19, 26} , { 36, 24} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 24, 23} , { 27, 16} , { 24, 30} , { 31, 29} , { 22, 41} , { 22, 42} , { 16, 60} , { 15, 52} , { 14, 60} , { 3, 78} , { -16, 123} , { 21, 53} , { 22, 56} , { 25, 61} } + } +}; +#endif + + +#endif + diff --git a/Src/h264dec/lcommon/inc/enc_statistics.h b/Src/h264dec/lcommon/inc/enc_statistics.h new file mode 100644 index 00000000..534a7d4c --- /dev/null +++ b/Src/h264dec/lcommon/inc/enc_statistics.h @@ -0,0 +1,60 @@ +/*! + ************************************************************************** + * \file enc_statistics.h + * + * \brief + * statistics reports for the encoding process. + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Tourapis <alexismt@ieee.org> + * - Karsten Sühring <suehring@hhi.de> + * + ************************************************************************** + */ + +#ifndef _ENC_STATISTICS_H_ +#define _ENC_STATISTICS_H_ +#include "global.h" + +struct stat_parameters +{ + float bitr; //!< bit rate for current frame, used only for output til terminal + float bitrate; //!< average bit rate for the sequence except first frame + int64 bit_ctr; //!< counter for bit usage + int64 bit_ctr_n; //!< bit usage for the current frame + int64 bit_ctr_emulationprevention; //!< stored bits needed to prevent start code emulation + int bit_slice; //!< number of bits in current slice + int stored_bit_slice; //!< keep number of bits in current slice (to restore status in case of MB re-encoding) + int b8_mode_0_use [NUM_SLICE_TYPES][2]; + int64 mode_use_transform[NUM_SLICE_TYPES][MAXMODE][2]; + int64 intra_chroma_mode[4]; + + // B pictures + int NumberBFrames; + + int frame_counter; + int64 quant [NUM_SLICE_TYPES]; + int64 num_macroblocks [NUM_SLICE_TYPES]; + int frame_ctr [NUM_SLICE_TYPES]; + int64 bit_counter [NUM_SLICE_TYPES]; + float bitrate_st [NUM_SLICE_TYPES]; + int64 mode_use [NUM_SLICE_TYPES][MAXMODE]; //!< Macroblock mode usage for Intra frames + int64 bit_use_mode [NUM_SLICE_TYPES][MAXMODE]; //!< statistics of bit usage + int64 bit_use_mb_type [NUM_SLICE_TYPES]; + int64 bit_use_header [NUM_SLICE_TYPES]; + int64 tmp_bit_use_cbp [NUM_SLICE_TYPES]; + int64 bit_use_coeffC [NUM_SLICE_TYPES]; + int64 bit_use_coeff [3][NUM_SLICE_TYPES]; + int64 bit_use_delta_quant [NUM_SLICE_TYPES]; + int64 bit_use_stuffingBits[NUM_SLICE_TYPES]; + + int bit_ctr_parametersets; + int bit_ctr_parametersets_n; + int64 bit_ctr_filler_data; + int64 bit_ctr_filler_data_n; + +}; +typedef struct stat_parameters StatParameters; + +#endif diff --git a/Src/h264dec/lcommon/inc/frame.h b/Src/h264dec/lcommon/inc/frame.h new file mode 100644 index 00000000..25507d51 --- /dev/null +++ b/Src/h264dec/lcommon/inc/frame.h @@ -0,0 +1,50 @@ + +/*! + ************************************************************************ + * \file frame.h + * + * \brief + * headers for frame format related information + * + * \author + * + ************************************************************************ + */ +#ifndef H264_FRAME_H_ +#define H264_FRAME_H_ +#pragma once + +typedef enum { + CM_UNKNOWN = -1, + CM_YUV = 0, + CM_RGB = 1, + CM_XYZ = 2 +} ColorModel; + +typedef enum { + CF_UNKNOWN = -1, //!< Unknown color format + YUV400 = 0, //!< Monochrome + YUV420 = 1, //!< 4:2:0 + YUV422 = 2, //!< 4:2:2 + YUV444 = 3 //!< 4:4:4 +} ColorFormat; + +typedef struct frame_format +{ + ColorFormat yuv_format; //!< YUV format (0=4:0:0, 1=4:2:0, 2=4:2:2, 3=4:4:4) + int width; //!< luma component frame width + int height; //!< luma component frame height + int height_cr; //!< chroma component frame width + int width_cr; //!< chroma component frame height + int width_crop; //!< width after cropping consideration + int height_crop; //!< height after cropping consideration + int mb_width; //!< luma component frame width + int mb_height; //!< luma component frame height + int size_cmp[3]; //!< component sizes + int size; //!< total image size + int bit_depth[3]; //!< component bit depth + int max_value[3]; //!< component max value + int max_value_sq[3]; //!< component max value squared +} FrameFormat; + +#endif diff --git a/Src/h264dec/lcommon/inc/ifunctions.h b/Src/h264dec/lcommon/inc/ifunctions.h new file mode 100644 index 00000000..0d0e86d8 --- /dev/null +++ b/Src/h264dec/lcommon/inc/ifunctions.h @@ -0,0 +1,251 @@ + +/*! + ************************************************************************ + * \file + * ifunctions.h + * + * \brief + * define some inline functions that are used within the encoder. + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Sühring <suehring@hhi.de> + * - Alexis Tourapis <alexismt@ieee.org> + * + ************************************************************************ + */ +#ifndef _IFUNCTIONS_H_ +#define _IFUNCTIONS_H_ + +# if !defined(WIN32) && (__STDC_VERSION__ < 199901L) + #define static + #define inline +#endif +#include <math.h> +#include <limits.h> + + +static inline short smin(short a, short b) +{ + return (short) (((a) < (b)) ? (a) : (b)); +} + +static inline short smax(short a, short b) +{ + return (short) (((a) > (b)) ? (a) : (b)); +} + +static inline int imin(int a, int b) +{/* + int retu; + _asm + { + mov eax, a + mov edx, b + cmp edx, eax + cmovle eax, edx + mov retu, eax + } + return retu;*/ + return ((a) < (b)) ? (a) : (b); +} + +static inline int imax(int a, int b) +{ + return ((a) > (b)) ? (a) : (b); +} + +static inline double dmin(double a, double b) +{ + return ((a) < (b)) ? (a) : (b); +} + +static inline double dmax(double a, double b) +{ + return ((a) > (b)) ? (a) : (b); +} + +static inline int64 i64min(int64 a, int64 b) +{ + return ((a) < (b)) ? (a) : (b); +} + +static inline int64 i64max(int64 a, int64 b) +{ + return ((a) > (b)) ? (a) : (b); +} + + +static inline short sabs(short x) +{ + static const short SHORT_BITS = (sizeof(short) * CHAR_BIT) - 1; + short y = (short) (x >> SHORT_BITS); + return (short) ((x ^ y) - y); +} + +static inline int iabs(int x) +{ + static const int INT_BITS = (sizeof(int) * CHAR_BIT) - 1; + int y = x >> INT_BITS; + return (x ^ y) - y; +} + +static inline double dabs(double x) +{ + return ((x) < 0) ? -(x) : (x); +} + +static inline int64 i64abs(int64 x) +{ + static const int64 INT64_BITS = (sizeof(int64) * CHAR_BIT) - 1; + int64 y = x >> INT64_BITS; + return (x ^ y) - y; +} + +static inline double dabs2(double x) +{ + return (x) * (x); +} + +static inline int iabs2(int x) +{ + return (x) * (x); +} + +static inline int64 i64abs2(int64 x) +{ + return (x) * (x); +} + +static inline int isign(int x) +{ + return ( (x > 0) - (x < 0)); +} + +static inline int isignab(int a, int b) +{ + return ((b) < 0) ? -iabs(a) : iabs(a); +} + +static inline int rshift_rnd(int x, int a) +{ + return (a > 0) ? ((x + (1 << (a-1) )) >> a) : (x << (-a)); +} + +static inline int rshift_rnd_pos(int x, int a) +{ + return (x + (1 << (a-1) )) >> a; +} + +// flip a before calling +static inline int rshift_rnd_nonpos(int x, int a) +{ + return (x << a); +} + +static inline int rshift_rnd_sign(int x, int a) +{ + return (x > 0) ? ( ( x + (1 << (a-1)) ) >> a ) : (-( ( iabs(x) + (1 << (a-1)) ) >> a )); +} + +static inline unsigned int rshift_rnd_us(unsigned int x, unsigned int a) +{ + return (a > 0) ? ((x + (1 << (a-1))) >> a) : x; +} + +static inline int rshift_rnd_sf(int x, int a) +{ + return ((x + (1 << (a-1) )) >> a); +} + +static inline unsigned int rshift_rnd_us_sf(unsigned int x, unsigned int a) +{ + return ((x + (1 << (a-1))) >> a); +} + +static inline int iClip1(int high, int x) +{ + if (x < 0) + return 0; + if (x > high) + return high; + return x; + /* old: + x = imax(x, 0); + x = imin(x, high); + + return x;*/ +} + +static inline int iClip3(int low, int high, int x) +{ + if (x < low) + return low; + if (x > high) + return high; + return x; + /* old: + x = imax(x, low); + x = imin(x, high); + + return x;*/ +} + +static inline short sClip3(short low, short high, short x) +{ + x = smax(x, low); + x = smin(x, high); + + return x; +} + +static inline double dClip3(double low, double high, double x) +{ + x = dmax(x, low); + x = dmin(x, high); + + return x; +} + +static inline int weighted_cost(int factor, int bits) +{ + return (((factor)*(bits))>>LAMBDA_ACCURACY_BITS); +} + +static inline int RSD(int x) +{ + return ((x&2)?(x|1):(x&(~1))); +} + +static inline int power2(int x) +{ + return 1 << (x); +} + +static inline int float2int (float x) +{ + return (int)((x < 0) ? (x - 0.5f) : (x + 0.5f)); +} + + + +#if ZEROSNR +static inline float psnr(int max_sample_sq, int samples, float sse_distortion ) +{ + return (float) (10.0 * log10(max_sample_sq * (double) ((double) samples / (sse_distortion < 1.0 ? 1.0 : sse_distortion)))); +} +#else +static inline float psnr(int max_sample_sq, int samples, float sse_distortion ) +{ + return (float) (sse_distortion == 0.0 ? 0.0 : (10.0 * log10(max_sample_sq * (double) ((double) samples / sse_distortion)))); +} +#endif + + +# if !defined(WIN32) && (__STDC_VERSION__ < 199901L) + #undef static + #undef inline +#endif + +#endif + diff --git a/Src/h264dec/lcommon/inc/img_io.h b/Src/h264dec/lcommon/inc/img_io.h new file mode 100644 index 00000000..7d57d03f --- /dev/null +++ b/Src/h264dec/lcommon/inc/img_io.h @@ -0,0 +1,28 @@ +/*! + ************************************************************************************* + * \file img_io.h + * + * \brief + * image I/O related functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + ************************************************************************************* + */ +#include "global.h" + +#ifndef _IMG_IO_H_ +#define _IMG_IO_H_ + +#include "io_video.h" + +extern int ParseSizeFromString (VideoDataFile *input_file, int *xlen, int *ylen, double *fps); +extern void ParseFrameNoFormatFromString (VideoDataFile *input_file); +extern void OpenFrameFile (VideoDataFile *input_file, int FrameNumberInFile); +extern void OpenFiles (VideoDataFile *input_file); +extern void CloseFiles (VideoDataFile *input_file); +extern VideoFileType ParseVideoType (VideoDataFile *input_file); + +#endif + diff --git a/Src/h264dec/lcommon/inc/mb_access.h b/Src/h264dec/lcommon/inc/mb_access.h new file mode 100644 index 00000000..0bad3bca --- /dev/null +++ b/Src/h264dec/lcommon/inc/mb_access.h @@ -0,0 +1,70 @@ + +/*! + ************************************************************************************* + * \file mb_access.h + * + * \brief + * Functions for macroblock neighborhoods + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Sühring <suehring@hhi.de> + * - Alexis Michael Tourapis <alexismt@ieee.org> + ************************************************************************************* + */ + +#ifndef _MB_ACCESS_H_ +#define _MB_ACCESS_H_ + +extern void CheckAvailabilityOfNeighbors(Macroblock *currMB); + +/* MB Aff */ +extern void getAffNeighbour (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); +extern void getAffNeighbourLuma (const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getAffNeighbourXPLuma (const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getAffNeighbourPPLumaNB (const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getAffNeighbourNPLuma (const Macroblock *currMB, int yN, PixelPos *pix); +extern void getAffNeighbourN0Luma (const Macroblock *currMB, PixelPos *pix); +extern void getAffNeighbourNXLuma (const Macroblock *currMB, int xN, PixelPos *pix); +extern void getAffNeighbour0X (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); +extern void getAffNeighbour0XLuma (const Macroblock *currMB, int yN, PixelPos *pix); +extern void getAffNeighbour0N (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); +extern void getAffNeighbourX0 (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); +extern void getAffNeighbourNX (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); +extern void getAffNeighbourN0 (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); +extern void getAffNeighbour0NLuma (const Macroblock *currMB, PixelPos *pix); +extern void getAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix); +/* normal */ +extern void getNonAffNeighbour (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbourXP_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbourPX_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbourLuma (const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourXPLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourPXLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourN0Luma(const Macroblock *currMB, PixelPos *pix); +extern void getNonAffNeighbourNXLuma(const Macroblock *currMB, int yN, PixelPos *pix); +extern void getNonAffNeighbourN0 (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbour0N (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbourNX (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbourNP (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbourNPChromaNB(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbour0X (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbour0XLuma(const Macroblock *currMB, int yN, PixelPos *pix); +extern void getNonAffNeighbourX0 (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); +extern void getNonAffNeighbour0NLuma(const Macroblock *currMB, PixelPos *pix); +extern void getNonAffNeighbourNPLumaNB(const Macroblock *currMB, int yN, PixelPos *pix); +extern void getNonAffNeighbourXPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourPPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourXPLumaNB_NoPos(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix); +extern void getNonAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix); +extern void get4x4Neighbour (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); +extern void get4x4NeighbourLuma (const Macroblock *currMB, int block_x, int block_y, PixelPos *pix); +extern Boolean mb_is_available (int mbAddr, const Macroblock *currMB); +extern void get_mb_pos (VideoParameters *p_Vid, int mb_addr, const int mb_size[2], short *x, short *y); +extern void get_mb_block_pos_normal (const h264_pic_position *PicPos, int mb_addr, short *x, short *y); +extern void get_mb_block_pos_mbaff (const h264_pic_position *PicPos, int mb_addr, short *x, short *y); + + +#endif diff --git a/Src/h264dec/lcommon/inc/memalloc.h b/Src/h264dec/lcommon/inc/memalloc.h new file mode 100644 index 00000000..fb4c3132 --- /dev/null +++ b/Src/h264dec/lcommon/inc/memalloc.h @@ -0,0 +1,71 @@ + +/*! + ************************************************************************ + * \file memalloc.h + * + * \brief + * Memory allocation and free helper funtions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Sühring <suehring@hhi.de> + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************ + */ + +#ifndef _MEMALLOC_H_ +#define _MEMALLOC_H_ + +#include "global.h" +#include "quant_params.h" + +#if defined(USEMMX) // && (IMGTYPE == 0) // MMX, SSE, SSE2 intrinsic support +#if defined(_MSC_VER) || defined(__INTEL_COMPILER) // ICC +# include <emmintrin.h> +# else +# include <xmmintrin.h> +# endif +#endif + +extern int get_mem2D(byte ***array2D, int dim0, int dim1); +extern int get_mem3D(byte ****array3D, int dim0, int dim1, int dim2); +extern int get_mem4D(byte *****array4D, int dim0, int dim1, int dim2, int dim3); + +extern int get_mem2Dint(int ***array2D, int rows, int columns); +extern int get_mem3Dint(int ****array3D, int frames, int rows, int columns); +extern int get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns ); + +extern int get_mem2DPicMotion(struct pic_motion ***array3D, int rows, int columns); +extern int get_mem3Dref(h264_ref_t ****array3D, int frames, int rows, int columns); + +extern int get_mem2Dshort(short ***array2D, int dim0, int dim1); +extern MotionVector ***get_mem3DMotionVector(int dim0, int dim1, int dim2); +extern int get_mem4Dshort(short *****array4D, int dim0, int dim1, int dim2, int dim3); +extern int get_mem2Dpel(imgpel ***array2D, int rows, int columns); + +extern struct video_image *get_memImage(int width, int height); +extern void free_memImage(struct video_image *image); + +extern void free_mem2D (byte **array2D); +extern void free_mem3D (byte ***array3D); +extern void free_mem4D (byte ****array4D); +// +extern void free_mem2Dint (int **array2D); +extern void free_mem3Dint (int ***array3D); + +extern void free_mem3Dref(h264_ref_t ***array3D); +extern void free_mem2DPicMotion(struct pic_motion **array3D); +// +extern void free_mem2Dshort(short **array2D); + +extern void free_mem3DMotionVector(MotionVector ***); + +extern void free_mem2Dpel (imgpel **array2D); +extern int init_top_bot_planes(imgpel **imgFrame, int height, imgpel ***imgTopField, imgpel ***imgBotField); +extern void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField); + +extern void no_mem_exit(char *where); + + +#endif diff --git a/Src/h264dec/lcommon/inc/memcache.h b/Src/h264dec/lcommon/inc/memcache.h new file mode 100644 index 00000000..26131827 --- /dev/null +++ b/Src/h264dec/lcommon/inc/memcache.h @@ -0,0 +1,24 @@ +#pragma once +typedef struct image_cache +{ + int size_x, size_y; + struct video_image *head; +} ImageCache; + +void image_cache_set_dimensions(ImageCache *cache, int width, int height); +int image_cache_dimensions_match(ImageCache *cache, int width, int height); +void image_cache_add(ImageCache *cache, struct video_image *image); +struct video_image *image_cache_get(ImageCache *cache); +void image_cache_flush(ImageCache *cache); + +typedef struct motion_cache +{ + int size_x, size_y; + struct pic_motion **head; +} MotionCache; + +void motion_cache_set_dimensions(MotionCache *cache, int width, int height); +int motion_cache_dimensions_match(MotionCache *cache, int width, int height); +void motion_cache_add(MotionCache *cache, struct pic_motion **image); +struct pic_motion **motion_cache_get(MotionCache *cache); +void motion_cache_flush(MotionCache *cache);
\ No newline at end of file diff --git a/Src/h264dec/lcommon/inc/mv_prediction.h b/Src/h264dec/lcommon/inc/mv_prediction.h new file mode 100644 index 00000000..0f2a13e2 --- /dev/null +++ b/Src/h264dec/lcommon/inc/mv_prediction.h @@ -0,0 +1,19 @@ +/*! + ************************************************************************************* + * \file mv_prediction.h + * + * \brief + * Declarations for Motion Vector Prediction + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + ************************************************************************************* + */ + +#ifndef _MV_PREDICTION_H_ +#define _MV_PREDICTION_H_ + +extern void init_motion_vector_prediction(Macroblock *currMB, int MbaffFrameFlag); + +#endif diff --git a/Src/h264dec/lcommon/inc/nalucommon.h b/Src/h264dec/lcommon/inc/nalucommon.h new file mode 100644 index 00000000..f0288ac5 --- /dev/null +++ b/Src/h264dec/lcommon/inc/nalucommon.h @@ -0,0 +1,64 @@ + +/*! + ************************************************************************************** + * \file + * nalucommon.h + * \brief + * NALU handling common to encoder and decoder + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + * - Karsten Suehring <suehring@hhi.de> + *************************************************************************************** + */ + +#ifndef _NALUCOMMON_H_ +#define _NALUCOMMON_H_ + +#define MAXRBSPSIZE 64000 +#define MAXNALUSIZE 64000 + +//! values for nal_unit_type +typedef enum { + NALU_TYPE_SLICE = 1, + NALU_TYPE_DPA = 2, + NALU_TYPE_DPB = 3, + NALU_TYPE_DPC = 4, + NALU_TYPE_IDR = 5, + NALU_TYPE_SEI = 6, + NALU_TYPE_SPS = 7, + NALU_TYPE_PPS = 8, + NALU_TYPE_AUD = 9, + NALU_TYPE_EOSEQ = 10, + NALU_TYPE_EOSTREAM = 11, + NALU_TYPE_FILL = 12 +} NaluType; + +//! values for nal_ref_idc +typedef enum { + NALU_PRIORITY_HIGHEST = 3, + NALU_PRIORITY_HIGH = 2, + NALU_PRIORITY_LOW = 1, + NALU_PRIORITY_DISPOSABLE = 0 +} NalRefIdc; + +//! NAL unit structure +typedef struct nalu_t +{ + int startcodeprefix_len; //!< 4 for parameter sets and first slice in picture, 3 for everything else (suggested) + unsigned len; //!< Length of the NAL unit (Excluding the start code, which does not belong to the NALU) + unsigned max_size; //!< NAL Unit Buffer size + int forbidden_bit; //!< should be always FALSE + NaluType nal_unit_type; //!< NALU_TYPE_xxxx + NalRefIdc nal_reference_idc; //!< NALU_PRIORITY_xxxx + byte *buf; //!< contains the first byte followed by the EBSP + uint16 lost_packets; //!< true, if packet loss is detected +} NALU_t; + +//! allocate one NAL Unit +extern NALU_t *AllocNALU(int); + +//! free one NAL Unit +extern void FreeNALU(NALU_t *n); + +#endif diff --git a/Src/h264dec/lcommon/inc/quant_params.h b/Src/h264dec/lcommon/inc/quant_params.h new file mode 100644 index 00000000..c35682c0 --- /dev/null +++ b/Src/h264dec/lcommon/inc/quant_params.h @@ -0,0 +1,55 @@ +/*! + *************************************************************************** + * \file + * quant_params.h + * + * \author + * Alexis Michael Tourapis + * + * \brief + * Headerfile for Quantization parameters + ************************************************************************** + */ + +#ifndef _QUANT_PARAMS_H_ +#define _QUANT_PARAMS_H_ + +struct level_quant_params { + int OffsetComp; + int ScaleComp; + int InvScaleComp; +}; + +typedef struct level_quant_params LevelQuantParams; + +struct quant_params { + int AdaptRndWeight; + int AdaptRndCrWeight; + + LevelQuantParams *****q_params_4x4; + LevelQuantParams *****q_params_8x8; + + int *qp_per_matrix; + int *qp_rem_matrix; + + short **OffsetList4x4input; + short **OffsetList8x8input; + short ***OffsetList4x4; + short ***OffsetList8x8; +}; + +struct quant_methods { + int block_y; + int block_x; + int qp; + int* ACLevel; + int* ACRun; + int **fadjust; + LevelQuantParams **q_params; + int *coeff_cost; + const byte (*pos_scan)[2]; + const byte *c_cost; +}; + +#endif + diff --git a/Src/h264dec/lcommon/inc/transform.h b/Src/h264dec/lcommon/inc/transform.h new file mode 100644 index 00000000..61942004 --- /dev/null +++ b/Src/h264dec/lcommon/inc/transform.h @@ -0,0 +1,27 @@ + +/*! + *************************************************************************** + * + * \file transform.h + * + * \brief + * prototypes of transform functions + * + * \date + * 10 July 2007 + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * Alexis Michael Tourapis + **************************************************************************/ + +#ifndef _TRANSFORM_H_ +#define _TRANSFORM_H_ + +#include "global.h" + +extern void forward4x4 (int **block , int **tblock, int pos_y, int pos_x); +extern void ihadamard4x4 (int block[4][4]); +extern void ihadamard2x2 (int block[4], int tblock[4]); + +#endif //_TRANSFORM_H_ diff --git a/Src/h264dec/lcommon/inc/typedefs.h b/Src/h264dec/lcommon/inc/typedefs.h new file mode 100644 index 00000000..58806bef --- /dev/null +++ b/Src/h264dec/lcommon/inc/typedefs.h @@ -0,0 +1,71 @@ +/*! + ************************************************************************************* + * \file typedefs.h + * + * \brief + * Common type definitions + * Currently only supports Windows and Linux operating systems. + * Need to add support for other "older systems such as VAX, DECC, Unix Alpha etc + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + ************************************************************************************* + */ + +#ifndef _TYPEDEFS_H_ +#define _TYPEDEFS_H_ + +#include "win32.h" + +typedef unsigned char byte; //!< byte type definition +typedef unsigned char uint8; //!< type definition for unsigned char (same as byte, 8 bits) +typedef unsigned short uint16; //!< type definition for unsigned short (16 bits) +typedef unsigned int uint32; //!< type definition for unsigned int (32 bits) + +typedef char int8; +typedef short int16; +typedef int int32; + +#if (IMGTYPE == 0) +typedef byte imgpel; +typedef uint16 distpel; +typedef int32 distblk; +#elif (IMGTYPE == 2) +typedef float imgpel; +typedef float distpel; +typedef float distblk; +#else +typedef uint16 imgpel; +typedef uint32 distpel; +typedef int64 distblk; +#endif + +//! Boolean Type +#ifdef FALSE +# define Boolean int +#else +typedef enum { + FALSE, + TRUE +} Boolean; +#endif + +/* +#define MAXUINT8 0xff +#define MAXUINT16 0xffff +#define MAXUINT32 0xffffffff +#define MAXUINT64 0xffffffffffffffff + +#define MAXINT8 0x7f +#define MININT8 (-MAXINT8) +#define MAXINT16 0x7fff +#define MININT16 (-MAXINT16) +#define MAXINT32 0x7fffffff +#define MININT32 (-MAXINT32) +#define MAXINT64 0x7fffffffffffffff +#define MININT64 (-MAXINT64) +*/ + +#endif + diff --git a/Src/h264dec/lcommon/inc/types.h b/Src/h264dec/lcommon/inc/types.h new file mode 100644 index 00000000..6088f8e4 --- /dev/null +++ b/Src/h264dec/lcommon/inc/types.h @@ -0,0 +1,204 @@ +/*! + ************************************************************************ + * \file + * types.h + * + * \brief + * type definitions. + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * + ************************************************************************ + */ +#ifndef _TYPES_H_ +#define _TYPES_H_ + +/*********************************************************************** + * T y p e d e f i n i t i o n s f o r T M L + *********************************************************************** + */ + +typedef enum +{ + // YUV + PLANE_Y = 0, // PLANE_Y + PLANE_U = 1, // PLANE_Cb + PLANE_V = 2, // PLANE_Cr + // RGB + PLANE_G = 0, + PLANE_B = 1, + PLANE_R = 2 +} ColorPlane; + +enum { + LIST_0 = 0, + LIST_1 = 1, + BI_PRED = 2, + BI_PRED_L0 = 3, + BI_PRED_L1 = 4 +}; + +enum { + ERROR_SAD = 0, + ERROR_SSE = 1, + ERROR_SATD = 2, + ERROR_PSATD = 3 +}; + +enum { + ME_Y_ONLY = 0, + ME_YUV_FP = 1, + ME_YUV_FP_SP = 2 +}; + + +enum { + DISTORTION_MSE = 0 +}; + + +//! Data Partitioning Modes +typedef enum +{ + PAR_DP_1, //!< no data partitioning is supported + PAR_DP_3 //!< data partitioning with 3 partitions +} PAR_DP_TYPE; + + +//! Output File Types +typedef enum +{ + PAR_OF_ANNEXB, //!< Annex B byte stream format + PAR_OF_RTP, //!< RTP packets in outfile + PAR_OF_MEMORY, //!< +} PAR_OF_TYPE; + +//! Field Coding Types +typedef enum +{ + FRAME_CODING, + FIELD_CODING, + ADAPTIVE_CODING, + FRAME_MB_PAIR_CODING +} CodingType; + +//! definition of H.264 syntax elements +typedef enum +{ + SE_HEADER, + SE_PTYPE, + SE_MBTYPE, + SE_REFFRAME, + SE_INTRAPREDMODE, + SE_MVD, + SE_CBP, + SE_LUM_DC_INTRA, + SE_CHR_DC_INTRA, + SE_LUM_AC_INTRA, + SE_CHR_AC_INTRA, + SE_LUM_DC_INTER, + SE_CHR_DC_INTER, + SE_LUM_AC_INTER, + SE_CHR_AC_INTER, + SE_DELTA_QUANT, + SE_BFRAME, + SE_EOS, + SE_MAX_ELEMENTS = 20 //!< number of maximum syntax elements +} SE_type; // substituting the definitions in elements.h + + +typedef enum +{ + NO_SLICES, + FIXED_MB, + FIXED_RATE, + CALL_BACK +} SliceMode; + + +typedef enum +{ + CAVLC, + CABAC +} SymbolMode; + +typedef enum +{ + FULL_SEARCH = -1, + FAST_FULL_SEARCH = 0, + UM_HEX = 1, + UM_HEX_SIMPLE = 2, + EPZS = 3 +} SearchType; + + +typedef enum +{ + FRAME, + TOP_FIELD, + BOTTOM_FIELD +} PictureStructure; //!< New enum for field processing + +typedef enum +{ + P_SLICE = 0, + B_SLICE = 1, + I_SLICE = 2, + SP_SLICE = 3, + SI_SLICE = 4, + NUM_SLICE_TYPES = 5 +} SliceType; + +//Motion Estimation levels +typedef enum +{ + F_PEL, //!< Full Pel refinement + H_PEL, //!< Half Pel refinement + Q_PEL //!< Quarter Pel refinement +} MELevel; + +typedef enum +{ + FAST_ACCESS = 0, //!< Fast/safe reference access + UMV_ACCESS = 1 //!< unconstrained reference access +} REF_ACCESS_TYPE; + +typedef enum +{ + IS_LUMA = 0, + IS_CHROMA = 1 +} Component_Type; + +typedef enum +{ + RC_MODE_0 = 0, + RC_MODE_1 = 1, + RC_MODE_2 = 2, + RC_MODE_3 = 3 +} RCModeType; + + +typedef enum { + SSE = 0, + SSE_RGB = 1, + PSNR = 2, + PSNR_RGB = 3, + SSIM = 4, + SSIM_RGB = 5, + MS_SSIM = 6, + MS_SSIM_RGB = 7, + TOTAL_DIST_TYPES = 8 +} distortion_types; + +typedef enum { + WP_MCPREC_PLUS0 = 4, + WP_MCPREC_PLUS1 = 5, + WP_MCPREC_MINUS0 = 6, + WP_MCPREC_MINUS1 = 7, + WP_MCPREC_MINUS_PLUS0 = 8, + WP_REGULAR = 9 +} weighted_prediction_types; + + +#endif diff --git a/Src/h264dec/lcommon/inc/win32.h b/Src/h264dec/lcommon/inc/win32.h new file mode 100644 index 00000000..09ffef61 --- /dev/null +++ b/Src/h264dec/lcommon/inc/win32.h @@ -0,0 +1,92 @@ + +/*! + ************************************************************************ + * \file + * win32.h + * + * \brief + * win32 definitions for H.264 encoder. + * + * \author + * + ************************************************************************ + */ +#ifndef _H264_WIN32_H_ +#define _H264_WIN32_H_ +#pragma once + +# include <fcntl.h> +# include <stdlib.h> +# include <stdio.h> +# include <string.h> +# include <assert.h> + +#if defined(WIN32) +# include <io.h> +# include <sys/types.h> +# include <sys/stat.h> +# include <windows.h> +#ifndef strcasecmp +# define strcasecmp _strcmpi +#endif + +# define snprintf _snprintf +# define open _open +# define close _close +# define read _read +# define write _write +#ifndef lseek +# define lseek _lseeki64 +#endif +# define fsync _commit +# define tell _telli64 +# define TIMEB _timeb +# define TIME_T LARGE_INTEGER +# define OPENFLAGS_WRITE _O_WRONLY|_O_CREAT|_O_BINARY|_O_TRUNC +# define OPEN_PERMISSIONS _S_IREAD | _S_IWRITE +# define OPENFLAGS_READ _O_RDONLY|_O_BINARY +# define inline _inline +# define forceinline __forceinline +#else +# include <unistd.h> +# include <sys/time.h> +# include <sys/stat.h> +# include <time.h> + +# define TIMEB timeb +# define TIME_T struct timeval +# define tell(fd) lseek(fd, 0, SEEK_CUR) +# define OPENFLAGS_WRITE O_WRONLY|O_CREAT|O_TRUNC +# define OPENFLAGS_READ O_RDONLY +# define OPEN_PERMISSIONS S_IRUSR | S_IWUSR + +# if __STDC_VERSION__ >= 199901L + /* "inline" is a keyword */ +# else +# define inline /* nothing */ +# endif +# define forceinline inline +#endif + +#if defined(WIN32) && !defined(__GNUC__) +typedef __int64 int64; +typedef unsigned __int64 uint64; +# define FORMAT_OFF_T "I64d" +# ifndef INT64_MIN +# define INT64_MIN (-9223372036854775807i64 - 1i64) +# endif +#else + +typedef long long int64; +typedef unsigned long long uint64; +# define FORMAT_OFF_T "lld" +# ifndef INT64_MIN +# define INT64_MIN (-9223372036854775807LL - 1LL) +# endif +#endif + +void gettime(TIME_T* time); +int64 timediff(TIME_T* start, TIME_T* end); +int64 timenorm(int64 cur_time); + +#endif diff --git a/Src/h264dec/lcommon/src/img_io.c b/Src/h264dec/lcommon/src/img_io.c new file mode 100644 index 00000000..c0520218 --- /dev/null +++ b/Src/h264dec/lcommon/src/img_io.c @@ -0,0 +1,327 @@ + +/*! + ************************************************************************************* + * \file img_io.c + * + * \brief + * image I/O related functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + ************************************************************************************* + */ +#include "contributors.h" +#include "global.h" +#include "img_io.h" +#include "report.h" + +static const VIDEO_SIZE VideoRes[] = { + { "qcif" , 176, 144}, + { "qqvga" , 160, 128}, + { "qvga" , 320, 240}, + { "sif" , 352, 240}, + { "cif" , 352, 288}, + { "vga" , 640, 480}, + { "sd1" , 720, 480}, + { "sd2" , 704, 576}, + { "sd3" , 720, 576}, + { "720p" , 1280, 720}, + { "1080p" , 1920, 1080}, + { NULL, 0, 0} +}; + +/*! + ************************************************************************ + * \brief + * Parse Size from from file name + * + ************************************************************************ + */ +int ParseSizeFromString (VideoDataFile *input_file, int *x_size, int *y_size, double *fps) +{ + char *p1, *p2, *tail; + char *fn = input_file->fname; + char c; + int i = 0; + + *x_size = *y_size = -1; + p1 = p2 = fn; + while (p1 != NULL && p2 != NULL) + { + // Search for first '_' + p1 = strstr( p1, "_"); + if (p1 == NULL) + break; + + // Search for end character of x_size (first 'x' after last '_') + p2 = strstr( p1, "x"); + + // If no 'x' is found, exit + if (p2 == NULL) + break; + + // Try conversion of number + *p2 = 0; + *x_size = strtol( p1 + 1, &tail, 10); + + // If there are characters left in the string, or the string is null, discard conversion + if (*tail != '\0' || *(p1 + 1) == '\0') + { + *p2 = 'x'; + p1 = tail; + continue; + } + + // Conversion was correct. Restore string + *p2 = 'x'; + + // Search for end character of y_size (first '_' or '.' after last 'x') + p1 = strpbrk( p2 + 1, "_."); + // If no '_' or '.' is found, try again from current position + if (p1 == NULL) + { + p1 = p2 + 1; + continue; + } + + // Try conversion of number + c = *p1; + *p1 = 0; + *y_size = strtol( p2 + 1, &tail, 10); + + // If there are characters left in the string, or the string is null, discard conversion + if (*tail != '\0' || *(p2 + 1) == '\0') + { + *p1 = c; + p1 = tail; + continue; + } + + // Conversion was correct. Restore string + *p1 = c; + + // Search for end character of y_size (first 'i' or 'p' after last '_') + p2 = strstr( p1 + 1, "ip"); + + // If no 'i' or 'p' is found, exit + if (p2 == NULL) + break; + + // Try conversion of number + c = *p2; + *p2 = 0; + *fps = strtod( p1 + 1, &tail); + + // If there are characters left in the string, or the string is null, discard conversion + if (*tail != '\0' || *(p1 + 1) == '\0') + { + *p2 = c; + p1 = tail; + continue; + } + + // Conversion was correct. Restore string + *p2 = c; + break; + } + + // Now lets test some common video file formats + if (p1 == NULL || p2 == NULL) + { + for (i = 0; VideoRes[i].name != NULL; i++) + { + if (strcasecmp (fn, VideoRes[i].name)) + { + *x_size = VideoRes[i].x_size; + *y_size = VideoRes[i].y_size; + // Should add frame rate support as well + break; + } + } + } + + return (*x_size == -1 || *y_size == -1) ? 0 : 1; +} + +/*! + ************************************************************************ + * \brief + * Parse Size from from file name + * + ************************************************************************ + */ +void ParseFrameNoFormatFromString (VideoDataFile *input_file) +{ + char *p1, *p2, *tail; + char *fn = input_file->fname; + char *fhead = input_file->fhead; + char *ftail = input_file->ftail; + int *zero_pad = &input_file->zero_pad; + int *num_digits = &input_file->num_digits; + + *zero_pad = 0; + *num_digits = -1; + p1 = p2 = fn; + while (p1 != NULL && p2 != NULL) + { + // Search for first '_' + p1 = strstr( p1, "%"); + if (p1 == NULL) + break; + + strncpy(fhead, fn, p1 - fn); + + // Search for end character of x_size (first 'x' after last '_') + p2 = strstr( p1, "d"); + + // If no 'x' is found, exit + if (p2 == NULL) + break; + + // Try conversion of number + *p2 = 0; + + if (*(p1 + 1) == '0') + *zero_pad = 1; + + *num_digits = strtol( p1 + 1, &tail, 10); + + // If there are characters left in the string, or the string is null, discard conversion + if (*tail != '\0' || *(p1 + 1) == '\0') + { + *p2 = 'd'; + p1 = tail; + continue; + } + + // Conversion was correct. Restore string + *p2 = 'd'; + + tail++; + strncpy(ftail, tail, strlen(tail)); + break; + } + + if (input_file->vdtype == VIDEO_TIFF) + { + input_file->is_concatenated = 0; + } + else + input_file->is_concatenated = (*num_digits == -1) ? 1 : 0; +} + +/*! + ************************************************************************ + * \brief + * Open file containing a single frame + ************************************************************************ + */ +void OpenFrameFile( VideoDataFile *input_file, int FrameNumberInFile) +{ + char infile [FILE_NAME_SIZE], in_number[16]; + int length = 0; + in_number[length]='\0'; + length = strlen(input_file->fhead); + strncpy(infile, input_file->fhead, length); + infile[length]='\0'; + if (input_file->zero_pad) + snprintf(in_number, 16, "%0*d", input_file->num_digits, FrameNumberInFile); + else + snprintf(in_number, 16, "%*d", input_file->num_digits, FrameNumberInFile); + + strncat(infile, in_number, sizeof(in_number)); + length += sizeof(in_number); + infile[length]='\0'; + strncat(infile, input_file->ftail, strlen(input_file->ftail)); + length += strlen(input_file->ftail); + infile[length]='\0'; + + if ((input_file->f_num = open(infile, OPENFLAGS_READ)) == -1) + { + printf ("OpenFrameFile: cannot open file %s\n", infile); + report_stats_on_error(); + } +} + +/*! + ************************************************************************ + * \brief + * Open file(s) containing the entire frame sequence + ************************************************************************ + */ +void OpenFiles( VideoDataFile *input_file) +{ + if (input_file->is_concatenated == 1) + { + if (strlen(input_file->fname) == 0) + { + snprintf(errortext, ET_SIZE, "No input sequence name was provided. Please check settings."); + error (errortext, 500); + } + + if ((input_file->f_num = open(input_file->fname, OPENFLAGS_READ)) == -1) + { + snprintf(errortext, ET_SIZE, "Input file %s does not exist",input_file->fname); + error (errortext, 500); + } + } +} + +/*! + ************************************************************************ + * \brief + * Close input file + ************************************************************************ + */ +void CloseFiles(VideoDataFile *input_file) +{ + if (input_file->f_num != -1) + close(input_file->f_num); + input_file->f_num = -1; +} + +/* ========================================================================== + * + * ParseVideoType + * + * ========================================================================== +*/ +VideoFileType ParseVideoType (VideoDataFile *input_file) +{ + char *format; + + format = input_file->fname + strlen(input_file->fname) - 3; + + if (strcasecmp (format, "yuv") == 0) + { + input_file->vdtype = VIDEO_YUV; + input_file->format.yuv_format = YUV420; + input_file->avi = NULL; + } + else if (strcasecmp (format, "rgb") == 0) + { + input_file->vdtype = VIDEO_RGB; + input_file->format.yuv_format = YUV444; + input_file->avi = NULL; + } + else if (strcasecmp (format, "tif") == 0) + { + input_file->vdtype = VIDEO_TIFF; + input_file->avi = NULL; + } + else if (strcasecmp (format, "avi") == 0) + { + input_file->vdtype = VIDEO_AVI; + } + else + { + //snprintf(errortext, ET_SIZE, "ERROR: video file format not supported"); + //error (errortext, 500); + input_file->vdtype = VIDEO_YUV; + input_file->format.yuv_format = YUV420; + input_file->avi = NULL; + } + + return input_file->vdtype; +} diff --git a/Src/h264dec/lcommon/src/memalloc.c b/Src/h264dec/lcommon/src/memalloc.c new file mode 100644 index 00000000..da5872ed --- /dev/null +++ b/Src/h264dec/lcommon/src/memalloc.c @@ -0,0 +1,1280 @@ + +/*! + ************************************************************************ + * \file memalloc.c + * + * \brief + * Memory allocation and free helper functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * - Karsten Sühring <suehring@hhi.de> + * + ************************************************************************ + */ + +#include "global.h" +#include "memalloc.h" +#include "mbuffer.h" + +#define ROUNDUP16(size) (((size)+15) & ~15) + +#if !defined(USEMMX) + /*! + ************************************************************************ + * \brief + * Initialize 2-dimensional top and bottom field to point to the proper + * lines in frame + * + * \par Output: + * memory size in bytes + ************************************************************************/ +int init_top_bot_planes(imgpel **imgFrame, int dim0, imgpel ***imgTopField, imgpel ***imgBotField) +{ + int i; + + if((*imgTopField = (imgpel**) malloc((dim0>>1) * sizeof(imgpel*))) == NULL) + no_mem_exit("init_top_bot_planes: imgTopField"); + + if((*imgBotField = (imgpel**) malloc((dim0>>1) * sizeof(imgpel*))) == NULL) + no_mem_exit("init_top_bot_planes: imgBotField"); + + for(i = 0; i < (dim0>>1); i++) + { + (*imgTopField)[i] = imgFrame[2 * i ]; + (*imgBotField)[i] = imgFrame[2 * i + 1]; + } + + return dim0 * sizeof(imgpel*); +} + + /*! + ************************************************************************ + * \brief + * free 2-dimensional top and bottom fields without freeing target memory + * + * \par Output: + * memory size in bytes + ************************************************************************/ +void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField) +{ + free (imgTopField); + free (imgBotField); +} + + +/*! + ************************************************************************ + * \brief + * Allocate 1D memory array -> imgpel array1D[dim0 + * + * \par Output: + * memory size in bytes + ************************************************************************/ +int get_mem1Dpel(imgpel **array1D, int dim0) +{ + if((*array1D = (imgpel*)calloc(dim0, sizeof(imgpel))) == NULL) + no_mem_exit("get_mem1Dpel: arra12D"); + + return (sizeof(imgpel*) + dim0 * sizeof(imgpel)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> imgpel array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************/ +int get_mem2Dpel(imgpel ***array2D, int dim0, int dim1) +{ + int i; + + if((*array2D = (imgpel**)malloc(dim0 * sizeof(imgpel*))) == NULL) + no_mem_exit("get_mem2Dpel: array2D"); + if((*(*array2D) = (imgpel* )calloc(dim0 * dim1,sizeof(imgpel ))) == NULL) + no_mem_exit("get_mem2Dpel: array2D"); + + for(i = 1 ; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(imgpel*) + dim1 * sizeof(imgpel)); +} + +VideoImage *get_memImage(int width, int height) +{ + int i, stride; + VideoImage *image = (VideoImage *)calloc(1, sizeof(VideoImage)); + +#ifdef H264_IPP + + IppiSize roi = {width, height}; + if (!image) + return 0; + if((image->img = (imgpel**)malloc(height * sizeof(imgpel*))) == NULL) + return 0; + + image->base_address = (imgpel* )ippiMalloc_8u_C1(width, height+1, &stride); // height+1 so we can deal with overreading + if (!image->base_address) + return 0; + + image->stride=stride; + + for(i = 0 ; i < height; i++) + image->img[i] = image->base_address + stride*i; + + image->next = 0; + + return image; +#else + if (!image) + return 0; + stride = ROUNDUP16(width); + image->stride = stride; + + if((image->img = (imgpel**)malloc(height * sizeof(imgpel*))) == NULL) + return 0; + if((image->base_address = (imgpel* )malloc(stride * height* sizeof(imgpel))) == NULL) + return 0; + memset(image->base_address, 0, stride * height* sizeof(imgpel)); + + for(i = 0 ; i < height; i++) + image->img[i] = image->base_address + stride*i; + + return image; +#endif +} + +void free_memImage(VideoImage *image) +{ + free(image->img); + #ifdef H264_IPP + ippiFree(image->base_address); + #else + free(image->base_address); + #endif + free(image); +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory array -> imgpel array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3Dpel(imgpel ****array3D, int dim0, int dim1, int dim2) +{ + int i, mem_size = dim0 * sizeof(imgpel**); + + if(((*array3D) = (imgpel***)malloc(dim0 * sizeof(imgpel**))) == NULL) + no_mem_exit("get_mem3Dpel: array3D"); + + mem_size += get_mem2Dpel(*array3D, dim0 * dim1, dim2); + + for(i = 1; i < dim0; i++) + (*array3D)[i] = (*array3D)[i - 1] + dim1; + + return mem_size; +} +/*! + ************************************************************************ + * \brief + * Allocate 4D memory array -> imgpel array4D[dim0][dim1][dim2][dim3] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem4Dpel(imgpel *****array4D, int dim0, int dim1, int dim2, int dim3) +{ + int i, mem_size = dim0 * sizeof(imgpel***); + + if(((*array4D) = (imgpel****)malloc(dim0 * sizeof(imgpel***))) == NULL) + no_mem_exit("get_mem4Dpel: array4D"); + + mem_size += get_mem3Dpel(*array4D, dim0 * dim1, dim2, dim3); + + for(i = 1; i < dim0; i++) + (*array4D)[i] = (*array4D)[i - 1] + dim1; + + return mem_size; +} + + + + +/*! + ************************************************************************ + * \brief + * free 1D memory array + * which was allocated with get_mem1Dpel() + ************************************************************************ + */ +void free_mem1Dpel(imgpel *array1D) +{ + if (array1D) + { + free (array1D); + } + else + { + error ("free_mem1Dpel: trying to free unused memory",100); + } +} + +/*! + ************************************************************************ + * \brief + * free 2D memory array + * which was allocated with get_mem2Dpel() + ************************************************************************ + */ +void free_mem2Dpel(imgpel **array2D) +{ + if (array2D) + { + if (*array2D) + free (*array2D); + else + error ("free_mem2Dpel: trying to free unused memory",100); + + free (array2D); + } + else + { + error ("free_mem2Dpel: trying to free unused memory",100); + } +} + + +/*! + ************************************************************************ + * \brief + * free 3D memory array + * which was allocated with get_mem3Dpel() + ************************************************************************ + */ +void free_mem3Dpel(imgpel ***array3D) +{ + if (array3D) + { + free_mem2Dpel(*array3D); + free (array3D); + } + else + { + error ("free_mem3Dpel: trying to free unused memory",100); + } +} +/*! + ************************************************************************ + * \brief + * free 4D memory array + * which was allocated with get_mem4Dpel() + ************************************************************************ + */ +void free_mem4Dpel(imgpel ****array4D) +{ + if (array4D) + { + free_mem3Dpel(*array4D); + free (array4D); + } + else + { + error ("free_mem4Dpel: trying to free unused memory",100); + } +} +/*! + ************************************************************************ + * \brief + * free 5D memory array + * which was allocated with get_mem5Dpel() + ************************************************************************ + */ +void free_mem5Dpel(imgpel *****array5D) +{ + if (array5D) + { + free_mem4Dpel(*array5D); + free (array5D); + } + else + { + error ("free_mem5Dpel: trying to free unused memory",100); + } +} + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> unsigned char array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************/ +int get_mem2D(byte ***array2D, int dim0, int dim1) +{ + int i; + + if(( *array2D = (byte**)malloc(dim0 * sizeof(byte*))) == NULL) + no_mem_exit("get_mem2D: array2D"); + if((*(*array2D) = (byte* )calloc(dim0 * dim1,sizeof(byte ))) == NULL) + no_mem_exit("get_mem2D: array2D"); + + for(i = 1; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(byte*) + dim1 * sizeof(byte)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> int array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem2Dint(int ***array2D, int dim0, int dim1) +{ + int i; + + if((*array2D = (int**)malloc(dim0 * sizeof(int*))) == NULL) + no_mem_exit("get_mem2Dint: array2D"); + if((*(*array2D) = (int* )calloc(dim0 * dim1, sizeof(int ))) == NULL) + no_mem_exit("get_mem2Dint: array2D"); + + for(i = 1 ; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(int*) + dim1 * sizeof(int)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> int64 array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +static int get_mem2Dref(h264_ref_t ***array2D, int dim0, int dim1) +{ + int i; + size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(h264_ref_t)); + if((*array2D = (h264_ref_t**)malloc(dim0 * sizeof(h264_ref_t*))) == NULL) + no_mem_exit("get_mem2Dint64: array2D"); + if((*(*array2D) = (h264_ref_t* )_aligned_malloc(malloc_size, 32)) == NULL) + no_mem_exit("get_mem2Dint64: array2D"); + memset((*array2D)[0], 0, malloc_size); + + for(i = 1; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(h264_ref_t*) + dim1 * sizeof(h264_ref_t)); +} + +int get_mem2DPicMotion(PicMotion ***array2D, int dim0, int dim1) +{ + // we allocate with one extra position in the first dimension + // so the motion_cache can use it as a next pointer + int i; + size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(PicMotion)); + if((*array2D = (PicMotion**)malloc((dim0+1) * sizeof(PicMotion*))) == NULL) + no_mem_exit("get_mem2Dint64: array2D"); + if((*(*array2D) = (PicMotion* )_aligned_malloc(malloc_size, 32)) == NULL) + no_mem_exit("get_mem2Dint64: array2D"); + memset((*array2D)[0], 0, malloc_size); + + for(i = 1; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + (*array2D)[dim0] = 0; + + return dim0 * (sizeof(PicMotion*) + dim1 * sizeof(PicMotion)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory array -> unsigned char array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3D(byte ****array3D, int dim0, int dim1, int dim2) +{ + int i, mem_size = dim0 * sizeof(byte**); + + if(((*array3D) = (byte***)malloc(dim0 * sizeof(byte**))) == NULL) + no_mem_exit("get_mem3D: array3D"); + + mem_size += get_mem2D(*array3D, dim0 * dim1, dim2); + + for(i = 1; i < dim0; i++) + (*array3D)[i] = (*array3D)[i-1] + dim1; + + return mem_size; +} + +/*! + ************************************************************************ + * \brief + * Allocate 4D memory array -> unsigned char array4D[dim0][dim1][dim2][dim3] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem4D(byte *****array4D, int dim0, int dim1, int dim2, int dim3) +{ + int i, mem_size = dim0 * sizeof(byte***); + + if(((*array4D) = (byte****)malloc(dim0 * sizeof(byte***))) == NULL) + no_mem_exit("get_mem4D: array4D"); + + mem_size += get_mem3D(*array4D, dim0 * dim1, dim2, dim3); + + for(i = 1; i < dim0; i++) + (*array4D)[i] = (*array4D)[i-1] + dim1; + + return mem_size; +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory array -> int array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3Dint(int ****array3D, int dim0, int dim1, int dim2) +{ + int i, mem_size = dim0 * sizeof(int**); + + if(((*array3D) = (int***)malloc(dim0 * sizeof(int**))) == NULL) + no_mem_exit("get_mem3Dint: array3D"); + + mem_size += get_mem2Dint(*array3D, dim0 * dim1, dim2); + + for(i = 1; i < dim0; i++) + (*array3D)[i] = (*array3D)[i-1] + dim1; + + return mem_size; +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory array -> int64 array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3Dref(h264_ref_t ****array3D, int dim0, int dim1, int dim2) +{ + int i, mem_size = dim0 * sizeof(h264_ref_t**); + + if(((*array3D) = (h264_ref_t***)malloc(dim0 * sizeof(h264_ref_t**))) == NULL) + no_mem_exit("get_mem3Dint64: array3D"); + + mem_size += get_mem2Dref(*array3D, dim0 * dim1, dim2); + + for(i = 1; i < dim0; i++) + (*array3D)[i] = (*array3D)[i-1] + dim1; + + return mem_size; +} + +/*! + ************************************************************************ + * \brief + * Allocate 4D memory array -> int array4D[dim0][dim1][dim2][dim3] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem4Dint(int *****array4D, int dim0, int dim1, int dim2, int dim3) +{ + int i, mem_size = dim0 * sizeof(int***); + + if(((*array4D) = (int****)malloc(dim0 * sizeof(int***))) == NULL) + no_mem_exit("get_mem4Dint: array4D"); + + mem_size += get_mem3Dint(*array4D, dim0 * dim1, dim2, dim3); + + for(i = 1; i < dim0; i++) + (*array4D)[i] = (*array4D)[i-1] + dim1; + + return mem_size; +} + + + + +/*! + ************************************************************************ + * \brief + * free 2D memory array + * which was allocated with get_mem2D() + ************************************************************************ + */ +void free_mem2D(byte **array2D) +{ + if (array2D) + { + free (*array2D); + free (array2D); + } +} + +/*! + ************************************************************************ + * \brief + * free 2D memory array + * which was allocated with get_mem2Dint() + ************************************************************************ + */ +void free_mem2Dint(int **array2D) +{ + if (array2D) + { + free (*array2D); + free (array2D); + } +} + +/*! + ************************************************************************ + * \brief + * free 2D memory array + * which was allocated with get_mem2Dint64() + ************************************************************************ + */ +void free_mem2Dref(h264_ref_t **array2D) +{ + if (array2D) + { + _aligned_free (*array2D); + free (array2D); + } +} + +void free_mem2DPicMotion(PicMotion **array2D) +{ + if (array2D) + { + _aligned_free (*array2D); + free (array2D); + } +} + +/*! + ************************************************************************ + * \brief + * free 3D memory array + * which was allocated with get_mem3D() + ************************************************************************ + */ +void free_mem3D(byte ***array3D) +{ + if (array3D) + { + free_mem2D(*array3D); + free (array3D); + } +} + +/*! + ************************************************************************ + * \brief + * free 4D memory array + * which was allocated with get_mem3D() + ************************************************************************ + */ +void free_mem4D(byte ****array4D) +{ + if (array4D) + { + free_mem3D(*array4D); + free (array4D); + } +} + +/*! + ************************************************************************ + * \brief + * free 3D memory array + * which was allocated with get_mem3Dint() + ************************************************************************ + */ +void free_mem3Dint(int ***array3D) +{ + if (array3D) + { + free_mem2Dint(*array3D); + free (array3D); + } +} + + +/*! + ************************************************************************ + * \brief + * free 3D memory array + * which was allocated with get_mem3Dint64() + ************************************************************************ + */ +void free_mem3Dref(h264_ref_t ***array3D) +{ + if (array3D) + { + free_mem2Dref(*array3D); + free (array3D); + } +} + +void free_mem3DPicMotion(PicMotion ***array3D) +{ + if (array3D) + { + free_mem2DPicMotion(*array3D); + free (array3D); + } +} + +/*! + ************************************************************************ + * \brief + * free 4D memory array + * which was allocated with get_mem4Dint() + ************************************************************************ + */ +void free_mem4Dint(int ****array4D) +{ + if (array4D) + { + free_mem3Dint( *array4D); + free (array4D); + } +} + + +/*! + ************************************************************************ + * \brief + * Exit program if memory allocation failed (using error()) + * \param where + * string indicating which memory allocation failed + ************************************************************************ + */ +void no_mem_exit(char *where) +{ + snprintf(errortext, ET_SIZE, "Could not allocate memory: %s",where); + error (errortext, 100); +} + + +/*! + ************************************************************************ + * \brief + * Allocate 2D uint16 memory array -> uint16 array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem2Duint16(uint16 ***array2D, int dim0, int dim1) +{ + int i; + + if(( *array2D = (uint16**)malloc(dim0 * sizeof(uint16*))) == NULL) + no_mem_exit("get_mem2Duint16: array2D"); + + if((*(*array2D) = (uint16* )calloc(dim0 * dim1,sizeof(uint16 ))) == NULL) + no_mem_exit("get_mem2Duint16: array2D"); + + for(i = 1; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(uint16*) + dim1 * sizeof(uint16)); +} + + +/*! + ************************************************************************ + * \brief + * Allocate 2D short memory array -> short array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem2Dshort(short ***array2D, int dim0, int dim1) +{ + int i; + size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(short)); + if(( *array2D = (short**)malloc(dim0 * sizeof(short*))) == NULL) + no_mem_exit("get_mem2Dshort: array2D"); + if((*(*array2D) = (short* )_aligned_malloc(malloc_size, 32)) == NULL) + no_mem_exit("get_mem2Dshort: array2D"); + memset((*array2D)[0], 0, malloc_size); + + for(i = 1; i < dim0; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(short*) + dim1 * sizeof(short)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory short array -> short array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3Dshort(short ****array3D,int dim0, int dim1, int dim2) +{ + int i, mem_size = dim0 * sizeof(short**); + + if(((*array3D) = (short***)malloc(dim0 * sizeof(short**))) == NULL) + no_mem_exit("get_mem3Dshort: array3D"); + + mem_size += get_mem2Dshort(*array3D, dim0 * dim1, dim2); + + for(i = 1; i < dim0; i++) + (*array3D)[i] = (*array3D)[i-1] + dim1; + + return mem_size; +} + + +/*! + ************************************************************************ + * \brief + * Allocate 4D memory short array -> short array3D[dim0][dim1][dim2][dim3] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +static MotionVector **get_mem2DMotionVector(int dim0, int dim1) +{ + MotionVector **array2D; + int i; + size_t malloc_size = ROUNDUP16(dim0 * dim1)*sizeof(MotionVector); + if((array2D = (MotionVector**)malloc(dim0 * sizeof(MotionVector*))) == NULL) + return 0; + + if((array2D[0] = (MotionVector* )_aligned_malloc(malloc_size, 32)) == NULL) + { + free(array2D); + return 0; + } + memset(array2D[0], 0, malloc_size); + + for(i = 1; i < dim0; i++) + array2D[i] = array2D[i-1] + dim1; + + return array2D; +} + +MotionVector ***get_mem3DMotionVector(int dim0, int dim1, int dim2) +{ + MotionVector ***array3D; + int i; + + if((array3D = (MotionVector***)malloc(dim0 * sizeof(MotionVector **))) == NULL) + return 0; + + array3D[0] = get_mem2DMotionVector(dim0 * dim1, dim2); + if (!array3D[0]) + { + free(array3D); + return 0; + } + + for(i = 1; i < dim0; i++) + array3D[i] = array3D[i-1] + dim1; + + return array3D; +} + + + +/*! + ************************************************************************ + * \brief + * free 2D uint16 memory array + * which was allocated with get_mem2Duint16() + ************************************************************************ + */ +void free_mem2Duint16(uint16 **array2D) +{ + if (array2D) + { + free (*array2D); + free (array2D); + } + +} + +/*! + ************************************************************************ + * \brief + * free 2D short memory array + * which was allocated with get_mem2Dshort() + ************************************************************************ + */ +void free_mem2Dshort(short **array2D) +{ + if (array2D) + { + _aligned_free (*array2D); + free (array2D); + } +} + +/*! + ************************************************************************ + * \brief + * free 4D short memory array + * which was allocated with get_mem4Dshort() + ************************************************************************ + */ + +static void free_mem2DMotionVector(MotionVector **array2D) +{ + if (array2D) + { + _aligned_free(*array2D); + free (array2D); + } + +} + +void free_mem3DMotionVector(MotionVector ***array3D) +{ + if (array3D) + { + free_mem2DMotionVector( *array3D); + free (array3D); + } +} + + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> double array2D[dim0][dim1] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem2Ddouble(double ***array2D, int dim0, int dim1) +{ + int i; + + if((*array2D = (double**)malloc(dim0 * sizeof(double*))) == NULL) + no_mem_exit("get_mem2Ddouble: array2D"); + + if(((*array2D)[0] = (double* )calloc(dim0 * dim1,sizeof(double ))) == NULL) + no_mem_exit("get_mem2Ddouble: array2D"); + + for(i=1 ; i<dim0 ; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1 ; + + return dim0 * (sizeof(double*) + dim1 * sizeof(double)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> double array2D[dim0][dim1] + * Note that array is shifted towards offset allowing negative values + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem2Dodouble(double ***array2D, int dim0, int dim1, int offset) +{ + int i; + + if((*array2D = (double**)malloc(dim0 * sizeof(double*))) == NULL) + no_mem_exit("get_mem2Dodouble: array2D"); + if(((*array2D)[0] = (double* )calloc(dim0 * dim1, sizeof(double ))) == NULL) + no_mem_exit("get_mem2Dodouble: array2D"); + + (*array2D)[0] += offset; + + for(i=1 ; i<dim0 ; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1 ; + + return dim0 * (sizeof(double*) + dim1 * sizeof(double)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory double array -> double array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3Dodouble(double ****array3D, int dim0, int dim1, int dim2, int offset) +{ + int i,j; + + if(((*array3D) = (double***)malloc(dim0 * sizeof(double**))) == NULL) + no_mem_exit("get_mem3Dodouble: array3D"); + + if(((*array3D)[0] = (double** )calloc(dim0 * dim1, sizeof(double*))) == NULL) + no_mem_exit("get_mem3Dodouble: array3D"); + + (*array3D) [0] += offset; + + for(i=1 ; i<dim0 ; i++) + (*array3D)[i] = (*array3D)[i-1] + dim1 ; + + for (i = 0; i < dim0; i++) + for (j = -offset; j < dim1 - offset; j++) + if(((*array3D)[i][j] = (double* )calloc(dim2, sizeof(double))) == NULL) + no_mem_exit("get_mem3Dodouble: array3D"); + + return dim0*( sizeof(double**) + dim1 * ( sizeof(double*) + dim2 * sizeof(double))); +} + + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> int array2D[dim0][dim1] + * Note that array is shifted towards offset allowing negative values + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_offset_mem2Dshort(short ***array2D, int dim0, int dim1, int offset_y, int offset_x) +{ + int i; + + if((*array2D = (short**)malloc(dim0 * sizeof(short*))) == NULL) + no_mem_exit("get_offset_mem2Dshort: array2D"); + + if(((*array2D)[0] = (short* )calloc(dim0 * dim1, sizeof(short))) == NULL) + no_mem_exit("get_offset_mem2Dshort: array2D"); + (*array2D)[0] += offset_x + offset_y * dim1; + + for(i=-1 ; i > -offset_y - 1; i--) + { + (*array2D)[i] = (*array2D)[i+1] - dim1; + } + + for(i=1 ; i < dim1 - offset_y; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1; + + return dim0 * (sizeof(short*) + dim1 * sizeof(short)); +} + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory int array -> int array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem3Doint(int ****array3D, int dim0, int dim1, int dim2, int offset) +{ + int i,j; + + if(((*array3D) = (int***)malloc(dim0 * sizeof(int**))) == NULL) + no_mem_exit("get_mem3Doint: array3D"); + + if(((*array3D)[0] = (int** )calloc(dim0 * dim1, sizeof(int*))) == NULL) + no_mem_exit("get_mem3Doint: array3D"); + + (*array3D) [0] += offset; + + for(i=1 ; i<dim0 ; i++) + (*array3D)[i] = (*array3D)[i-1] + dim1 ; + + for (i = 0; i < dim0; i++) + for (j = -offset; j < dim1 - offset; j++) + if(((*array3D)[i][j] = (int* )calloc(dim2, sizeof(int))) == NULL) + no_mem_exit("get_mem3Doint: array3D"); + + return dim0 * (sizeof(int**) + dim1 * (sizeof(int*) + dim2 * sizeof(int))); +} + +/*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> int array2D[dim0][dim1] + * Note that array is shifted towards offset allowing negative values + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +int get_mem2Doint(int ***array2D, int dim0, int dim1, int offset) +{ + int i; + + if((*array2D = (int**)malloc(dim0 * sizeof(int*))) == NULL) + no_mem_exit("get_mem2Dint: array2D"); + if(((*array2D)[0] = (int* )calloc(dim0 * dim1, sizeof(int))) == NULL) + no_mem_exit("get_mem2Dint: array2D"); + + (*array2D)[0] += offset; + + for(i=1 ; i<dim0 ; i++) + (*array2D)[i] = (*array2D)[i-1] + dim1 ; + + return dim0 * (sizeof(int*) + dim1 * sizeof(int)); +} + + +/*! + ************************************************************************ + * \brief + * Allocate 3D memory array -> int array3D[dim0][dim1][dim2] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ +// same change as in get_mem3Dint +int get_mem3Ddouble(double ****array3D, int dim0, int dim1, int dim2) +{ + int j, mem_size = dim0 * sizeof(double**); + + double **array2D; + + if(((*array3D) = (double***)malloc(dim0 * sizeof(double**))) == NULL) + no_mem_exit("get_mem3Ddouble: array3D"); + + mem_size += get_mem2Ddouble(&array2D, dim0 * dim1, dim2); + + for(j = 0; j < dim0; j++) + { + (*array3D)[j] = &array2D[j * dim1]; + } + + return mem_size; +} + +/*! + ************************************************************************ + * \brief + * free 2D double memory array + * which was allocated with get_mem2Ddouble() + ************************************************************************ + */ +void free_mem2Ddouble(double **array2D) +{ + if (array2D) + { + if (*array2D) + free (*array2D); + else + error ("free_mem2Ddouble: trying to free unused memory",100); + + free (array2D); + + } + else + { + error ("free_mem2Ddouble: trying to free unused memory",100); + } +} + + +/*! +************************************************************************ +* \brief +* free 2D double memory array (with offset) +* which was allocated with get_mem2Ddouble() +************************************************************************ +*/ +void free_mem2Dodouble(double **array2D, int offset) +{ + if (array2D) + { + array2D[0] -= offset; + if (array2D[0]) + free (array2D[0]); + else error ("free_mem2Dodouble: trying to free unused memory",100); + + free (array2D); + + } else + { + error ("free_mem2Dodouble: trying to free unused memory",100); + } +} + +/*! + ************************************************************************ + * \brief + * free 3D memory array with offset + ************************************************************************ + */ +void free_mem3Dodouble(double ***array3D, int dim0, int dim1, int offset) +{ + int i, j; + + if (array3D) + { + for (i = 0; i < dim0; i++) + { + for (j = -offset; j < dim1 - offset; j++) + { + if (array3D[i][j]) + free(array3D[i][j]); + else + error ("free_mem3Dodouble: trying to free unused memory",100); + } + } + array3D[0] -= offset; + if (array3D[0]) + free(array3D[0]); + else + error ("free_mem3Dodouble: trying to free unused memory",100); + free (array3D); + } + else + { + error ("free_mem3Dodouble: trying to free unused memory",100); + } +} + +/*! + ************************************************************************ + * \brief + * free 3D memory array with offset + ************************************************************************ + */ +void free_mem3Doint(int ***array3D, int dim0, int dim1, int offset) +{ + int i, j; + + if (array3D) + { + for (i = 0; i < dim0; i++) + { + for (j = -offset; j < dim1 - offset; j++) + { + if (array3D[i][j]) + free(array3D[i][j]); + else + error ("free_mem3Doint: trying to free unused memory",100); + } + } + array3D[0] -= offset; + if (array3D[0]) + free(array3D[0]); + else + error ("free_mem3Doint: trying to free unused memory",100); + free (array3D); + } + else + { + error ("free_mem3Doint: trying to free unused memory",100); + } +} + + +/*! +************************************************************************ +* \brief +* free 2D double memory array (with offset) +* which was allocated with get_mem2Ddouble() +************************************************************************ +*/ +void free_mem2Doint(int **array2D, int offset) +{ + if (array2D) + { + array2D[0] -= offset; + if (array2D[0]) + free (array2D[0]); + else + error ("free_mem2Doint: trying to free unused memory",100); + + free (array2D); + + } + else + { + error ("free_mem2Doint: trying to free unused memory",100); + } +} + +/*! +************************************************************************ +* \brief +* free 2D double memory array (with offset) +* which was allocated with get_mem2Ddouble() +************************************************************************ +*/ +void free_offset_mem2Dshort(short **array2D, int dim1, int offset_y, int offset_x) +{ + if (array2D) + { + array2D[0] -= offset_x + offset_y * dim1; + if (array2D[0]) + free (array2D[0]); + else + error ("free_offset_mem2Dshort: trying to free unused memory",100); + + free (array2D); + + } + else + { + error ("free_offset_mem2Dshort: trying to free unused memory",100); + } +} + +/*! + ************************************************************************ + * \brief + * free 3D memory array + * which was alocated with get_mem3Dint() + ************************************************************************ + */ +void free_mem3Ddouble(double ***array3D) +{ + if (array3D) + { + free_mem2Ddouble(*array3D); + free (array3D); + } + else + { + error ("free_mem3D: trying to free unused memory",100); + } +} + + +#endif diff --git a/Src/h264dec/lcommon/src/memcache.c b/Src/h264dec/lcommon/src/memcache.c new file mode 100644 index 00000000..ce3b29d1 --- /dev/null +++ b/Src/h264dec/lcommon/src/memcache.c @@ -0,0 +1,106 @@ +#include "memcache.h" +#include "mbuffer.h" +#include "memalloc.h" + +void image_cache_flush(ImageCache *cache) +{ + while (cache->head) + { + VideoImage *next = cache->head->next; + free_memImage(cache->head); + cache->head = next; + } + cache->size_x = 0; + cache->size_y = 0; +} + +void image_cache_set_dimensions(ImageCache *cache, int width, int height) +{ + if (width != cache->size_x || height != cache->size_y) + { + image_cache_flush(cache); + cache->size_x = width; + cache->size_y = height; + } +} + +int image_cache_dimensions_match(ImageCache *cache, int width, int height) +{ + if (width != cache->size_x || height != cache->size_y) + return 0; + + return 1; +} + +void image_cache_add(ImageCache *cache, VideoImage *image) +{ + image->next = cache->head; + cache->head = image; +} + +struct video_image *image_cache_get(ImageCache *cache) +{ + if (cache->head) + { + VideoImage *ret = cache->head; + cache->head = ret->next; + ret->next = 0; + return ret; + } + return 0; +} + +/* ------------- + +PicMotion arrays are allowed with one extra slot in the first dimension +which we use as the next pointer +------------- */ + + +void motion_cache_flush(MotionCache *cache) +{ + while (cache->head) + { + PicMotion **next = (PicMotion **)cache->head[cache->size_y]; + free_mem2DPicMotion(cache->head); + cache->head = next; + } + cache->size_x = 0; + cache->size_y = 0; +} + +void motion_cache_set_dimensions(MotionCache *cache, int width, int height) +{ + if (width != cache->size_x || height != cache->size_y) + { + motion_cache_flush(cache); + cache->size_x = width; + cache->size_y = height; + } +} + +int motion_cache_dimensions_match(MotionCache *cache, int width, int height) +{ + if (width != cache->size_x || height != cache->size_y) + return 0; + + return 1; +} + +void motion_cache_add(MotionCache *cache, PicMotion **image) +{ + image[cache->size_y] = (PicMotion *)cache->head; + cache->head = image; +} + +struct pic_motion **motion_cache_get(MotionCache *cache) +{ + if (cache->head) + { + PicMotion **ret = cache->head; + cache->head = (PicMotion **)ret[cache->size_y]; + ret[cache->size_y] = 0; + return ret; + } + return 0; +} diff --git a/Src/h264dec/lcommon/src/mv_prediction.c b/Src/h264dec/lcommon/src/mv_prediction.c new file mode 100644 index 00000000..b4638d6d --- /dev/null +++ b/Src/h264dec/lcommon/src/mv_prediction.c @@ -0,0 +1,250 @@ +/*! + ************************************************************************************* + * \file mv_prediction.c + * + * \brief + * Motion Vector Prediction Functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * - Karsten Sühring <suehring@hhi.de> + ************************************************************************************* + */ + +#include "global.h" +#include "mbuffer.h" +/*! + ************************************************************************ + * \brief + * Get motion vector predictor + ************************************************************************ + */ +static void GetMotionVectorPredictorMBAFF (Macroblock *currMB, + PixelPos *block, // <--> block neighbors + short pmv[2], + short ref_frame, + PicMotion **motion, + int mb_x, + int mb_y, + int blockshape_x, + int blockshape_y) +{ + int mv_a, mv_b, mv_c, pred_vec=0; + int mvPredType, rFrameL, rFrameU, rFrameUR; + int hv; + VideoParameters *p_Vid = currMB->p_Vid; + + mvPredType = MVPRED_MEDIAN; + + + if (currMB->mb_field) + { + rFrameL = block[0].available + ? (p_Vid->mb_data[block[0].mb_addr].mb_field + ? motion[block[0].pos_y][block[0].pos_x].ref_idx + : motion[block[0].pos_y][block[0].pos_x].ref_idx * 2) : -1; + rFrameU = block[1].available + ? (p_Vid->mb_data[block[1].mb_addr].mb_field + ? motion[block[1].pos_y][block[1].pos_x].ref_idx + : motion[block[1].pos_y][block[1].pos_x].ref_idx * 2) : -1; + rFrameUR = block[2].available + ? (p_Vid->mb_data[block[2].mb_addr].mb_field + ? motion[block[2].pos_y][block[2].pos_x].ref_idx + : motion[block[2].pos_y][block[2].pos_x].ref_idx * 2) : -1; + } + else + { + rFrameL = block[0].available + ? (p_Vid->mb_data[block[0].mb_addr].mb_field + ? motion[block[0].pos_y][block[0].pos_x].ref_idx >>1 + : motion[block[0].pos_y][block[0].pos_x].ref_idx) : -1; + rFrameU = block[1].available + ? (p_Vid->mb_data[block[1].mb_addr].mb_field + ? motion[block[1].pos_y][block[1].pos_x].ref_idx >>1 + : motion[block[1].pos_y][block[1].pos_x].ref_idx) : -1; + rFrameUR = block[2].available + ? (p_Vid->mb_data[block[2].mb_addr].mb_field + ? motion[block[2].pos_y][block[2].pos_x].ref_idx >>1 + : motion[block[2].pos_y][block[2].pos_x].ref_idx) : -1; + } + + + /* Prediction if only one of the neighbors uses the reference frame + * we are checking + */ + if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame) + mvPredType = MVPRED_L; + else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame) + mvPredType = MVPRED_U; + else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame) + mvPredType = MVPRED_UR; + // Directional predictions + if(blockshape_x == 8 && blockshape_y == 16) + { + if(mb_x == 0) + { + if(rFrameL == ref_frame) + mvPredType = MVPRED_L; + } + else + { + if( rFrameUR == ref_frame) + mvPredType = MVPRED_UR; + } + } + else if(blockshape_x == 16 && blockshape_y == 8) + { + if(mb_y == 0) + { + if(rFrameU == ref_frame) + mvPredType = MVPRED_U; + } + else + { + if(rFrameL == ref_frame) + mvPredType = MVPRED_L; + } + } + + for (hv=0; hv < 2; hv++) + { + if (hv == 0) + { + mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[hv] : 0; + mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[hv] : 0; + mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[hv] : 0; + } + else + { + if (currMB->mb_field) + { + mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field + ? motion[block[0].pos_y][block[0].pos_x].mv[hv] + : motion[block[0].pos_y][block[0].pos_x].mv[hv] / 2 + : 0; + mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field + ? motion[block[1].pos_y][block[1].pos_x].mv[hv] + : motion[block[1].pos_y][block[1].pos_x].mv[hv] / 2 + : 0; + mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field + ? motion[block[2].pos_y][block[2].pos_x].mv[hv] + : motion[block[2].pos_y][block[2].pos_x].mv[hv] / 2 + : 0; + } + else + { + mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field + ? motion[block[0].pos_y][block[0].pos_x].mv[hv] * 2 + : motion[block[0].pos_y][block[0].pos_x].mv[hv] + : 0; + mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field + ? motion[block[1].pos_y][block[1].pos_x].mv[hv] * 2 + : motion[block[1].pos_y][block[1].pos_x].mv[hv] + : 0; + mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field + ? motion[block[2].pos_y][block[2].pos_x].mv[hv] * 2 + : motion[block[2].pos_y][block[2].pos_x].mv[hv] + : 0; + } + } + + switch (mvPredType) + { + case MVPRED_MEDIAN: + if(!(block[1].available || block[2].available)) + { + pred_vec = mv_a; + } + else + { + pred_vec = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c)); + } + break; + case MVPRED_L: + pred_vec = mv_a; + break; + case MVPRED_U: + pred_vec = mv_b; + break; + case MVPRED_UR: + pred_vec = mv_c; + break; + default: + break; + } + + pmv[hv] = (short) pred_vec; + } +} + +/*! + ************************************************************************ + * \brief + * Get motion vector predictor + ************************************************************************ + */ +// TODO: benski> make SSE3/MMX version +static void GetMotionVectorPredictorNormal (Macroblock *currMB, + PixelPos *block, // <--> block neighbors + short pmv[2], + short ref_frame, + PicMotion **motion, + int mb_x, + int mb_y, + int blockshape_x, + int blockshape_y) +{ + int rFrameL = block[0].available ? motion[block[0].pos_y][block[0].pos_x].ref_idx : -1; + int rFrameU = block[1].available ? motion[block[1].pos_y][block[1].pos_x].ref_idx : -1; + int rFrameUR = block[2].available ? motion[block[2].pos_y][block[2].pos_x].ref_idx : -1; + + /* Prediction if only one of the neighbors uses the reference frame + * we are checking + */ + if (rFrameL == ref_frame && + ((rFrameU != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x == 0) || (blockshape_x == 16 && blockshape_y == 8 && mb_y != 0))) + { // left + pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0; + pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0; + } + else if (rFrameU == ref_frame && + ((rFrameL != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 16 && blockshape_y == 8 && mb_y == 0))) + { // up + pmv[0] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0; + pmv[1] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0; + } + else if (rFrameUR == ref_frame && + ((rFrameL != ref_frame && rFrameU != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x != 0))) + { // upper right + pmv[0] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0; + pmv[1] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0; + } + else + { // median + if(!(block[1].available || block[2].available)) + { + pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0; + pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0; + } + else + { + int mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0; + int mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0; + int mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0; + pmv[0] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c)); + mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0; + mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0; + mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0; + pmv[1] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c)); + } + } +} + +void init_motion_vector_prediction(Macroblock *currMB, int mb_aff_frame_flag) +{ + if (mb_aff_frame_flag) + currMB->GetMVPredictor = GetMotionVectorPredictorMBAFF; + else + currMB->GetMVPredictor = GetMotionVectorPredictorNormal; +} diff --git a/Src/h264dec/lcommon/src/parsetcommon.c b/Src/h264dec/lcommon/src/parsetcommon.c new file mode 100644 index 00000000..fe3f0e9a --- /dev/null +++ b/Src/h264dec/lcommon/src/parsetcommon.c @@ -0,0 +1,244 @@ + +/*! + ************************************************************************************** + * \file + * parsetcommon.c + * \brief + * Picture and Sequence Parameter set generation and handling + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + * + ************************************************************************************** + */ + +#include "global.h" +#include "parsetcommon.h" +#include "memalloc.h" +/*! + ************************************************************************************* + * \brief + * Allocates memory for a picture paramater set + * + * \return + * pointer to a pps + ************************************************************************************* + */ + +pic_parameter_set_rbsp_t *AllocPPS () + { + pic_parameter_set_rbsp_t *p; + + if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL) + no_mem_exit ("AllocPPS: PPS"); + p->slice_group_id = NULL; + return p; + } + + +/*! + ************************************************************************************* + * \brief + * Allocates memory for am sequence paramater set + * + * \return + * pointer to a sps + ************************************************************************************* + */ + +seq_parameter_set_rbsp_t *AllocSPS () + { + seq_parameter_set_rbsp_t *p; + + if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL) + no_mem_exit ("AllocSPS: SPS"); + return p; + } + + +/*! + ************************************************************************************* + * \brief + * Frees a picture parameter set + * + * \param pps to be freed + * Picture parameter set to be freed + ************************************************************************************* + */ + + void FreePPS (pic_parameter_set_rbsp_t *pps) + { + assert (pps != NULL); + if (pps->slice_group_id != NULL) + free (pps->slice_group_id); + free (pps); + } + + + /*! + ************************************************************************************* + * \brief + * Frees a sps + * + * \param sps + * Sequence parameter set to be freed + ************************************************************************************* + */ + + void FreeSPS (seq_parameter_set_rbsp_t *sps) + { + assert (sps != NULL); + free (sps); + } + + +int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2) +{ + unsigned i; + int equal = 1; + + if ((!sps1->Valid) || (!sps2->Valid)) + return 0; + + equal &= (sps1->profile_idc == sps2->profile_idc); + equal &= (sps1->constrained_set0_flag == sps2->constrained_set0_flag); + equal &= (sps1->constrained_set1_flag == sps2->constrained_set1_flag); + equal &= (sps1->constrained_set2_flag == sps2->constrained_set2_flag); + equal &= (sps1->level_idc == sps2->level_idc); + equal &= (sps1->seq_parameter_set_id == sps2->seq_parameter_set_id); + equal &= (sps1->log2_max_frame_num_minus4 == sps2->log2_max_frame_num_minus4); + equal &= (sps1->pic_order_cnt_type == sps2->pic_order_cnt_type); + + if (!equal) return equal; + + if( sps1->pic_order_cnt_type == 0 ) + { + equal &= (sps1->log2_max_pic_order_cnt_lsb_minus4 == sps2->log2_max_pic_order_cnt_lsb_minus4); + } + + else if( sps1->pic_order_cnt_type == 1 ) + { + equal &= (sps1->delta_pic_order_always_zero_flag == sps2->delta_pic_order_always_zero_flag); + equal &= (sps1->offset_for_non_ref_pic == sps2->offset_for_non_ref_pic); + equal &= (sps1->offset_for_top_to_bottom_field == sps2->offset_for_top_to_bottom_field); + equal &= (sps1->num_ref_frames_in_pic_order_cnt_cycle == sps2->num_ref_frames_in_pic_order_cnt_cycle); + if (!equal) return equal; + + for ( i = 0 ; i< sps1->num_ref_frames_in_pic_order_cnt_cycle ;i ++) + equal &= (sps1->offset_for_ref_frame[i] == sps2->offset_for_ref_frame[i]); + } + + equal &= (sps1->num_ref_frames == sps2->num_ref_frames); + equal &= (sps1->gaps_in_frame_num_value_allowed_flag == sps2->gaps_in_frame_num_value_allowed_flag); + equal &= (sps1->pic_width_in_mbs_minus1 == sps2->pic_width_in_mbs_minus1); + equal &= (sps1->pic_height_in_map_units_minus1 == sps2->pic_height_in_map_units_minus1); + equal &= (sps1->frame_mbs_only_flag == sps2->frame_mbs_only_flag); + + if (!equal) return equal; + if( !sps1->frame_mbs_only_flag ) + equal &= (sps1->mb_adaptive_frame_field_flag == sps2->mb_adaptive_frame_field_flag); + + equal &= (sps1->direct_8x8_inference_flag == sps2->direct_8x8_inference_flag); + equal &= (sps1->frame_cropping_flag == sps2->frame_cropping_flag); + if (!equal) return equal; + if (sps1->frame_cropping_flag) + { + equal &= (sps1->frame_cropping_rect_left_offset == sps2->frame_cropping_rect_left_offset); + equal &= (sps1->frame_cropping_rect_right_offset == sps2->frame_cropping_rect_right_offset); + equal &= (sps1->frame_cropping_rect_top_offset == sps2->frame_cropping_rect_top_offset); + equal &= (sps1->frame_cropping_rect_bottom_offset == sps2->frame_cropping_rect_bottom_offset); + } + equal &= (sps1->vui_parameters_present_flag == sps2->vui_parameters_present_flag); + + return equal; +} + +int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2) +{ + unsigned i, j; + int equal = 1; + + if ((!pps1->Valid) || (!pps2->Valid)) + return 0; + + equal &= (pps1->pic_parameter_set_id == pps2->pic_parameter_set_id); + equal &= (pps1->seq_parameter_set_id == pps2->seq_parameter_set_id); + equal &= (pps1->entropy_coding_mode_flag == pps2->entropy_coding_mode_flag); + equal &= (pps1->bottom_field_pic_order_in_frame_present_flag == pps2->bottom_field_pic_order_in_frame_present_flag); + equal &= (pps1->num_slice_groups_minus1 == pps2->num_slice_groups_minus1); + + if (!equal) return equal; + + if (pps1->num_slice_groups_minus1>0) + { + equal &= (pps1->slice_group_map_type == pps2->slice_group_map_type); + if (!equal) return equal; + if (pps1->slice_group_map_type == 0) + { + for (i=0; i<=pps1->num_slice_groups_minus1; i++) + equal &= (pps1->run_length_minus1[i] == pps2->run_length_minus1[i]); + } + else if( pps1->slice_group_map_type == 2 ) + { + for (i=0; i<pps1->num_slice_groups_minus1; i++) + { + equal &= (pps1->top_left[i] == pps2->top_left[i]); + equal &= (pps1->bottom_right[i] == pps2->bottom_right[i]); + } + } + else if( pps1->slice_group_map_type == 3 || pps1->slice_group_map_type==4 || pps1->slice_group_map_type==5 ) + { + equal &= (pps1->slice_group_change_direction_flag == pps2->slice_group_change_direction_flag); + equal &= (pps1->slice_group_change_rate_minus1 == pps2->slice_group_change_rate_minus1); + } + else if( pps1->slice_group_map_type == 6 ) + { + equal &= (pps1->pic_size_in_map_units_minus1 == pps2->pic_size_in_map_units_minus1); + if (!equal) return equal; + for (i=0; i<=pps1->pic_size_in_map_units_minus1; i++) + equal &= (pps1->slice_group_id[i] == pps2->slice_group_id[i]); + } + } + + equal &= (pps1->num_ref_idx_l0_active_minus1 == pps2->num_ref_idx_l0_active_minus1); + equal &= (pps1->num_ref_idx_l1_active_minus1 == pps2->num_ref_idx_l1_active_minus1); + equal &= (pps1->weighted_pred_flag == pps2->weighted_pred_flag); + equal &= (pps1->weighted_bipred_idc == pps2->weighted_bipred_idc); + equal &= (pps1->pic_init_qp_minus26 == pps2->pic_init_qp_minus26); + equal &= (pps1->pic_init_qs_minus26 == pps2->pic_init_qs_minus26); + equal &= (pps1->chroma_qp_index_offset == pps2->chroma_qp_index_offset); + equal &= (pps1->deblocking_filter_control_present_flag == pps2->deblocking_filter_control_present_flag); + equal &= (pps1->constrained_intra_pred_flag == pps2->constrained_intra_pred_flag); + equal &= (pps1->redundant_pic_cnt_present_flag == pps2->redundant_pic_cnt_present_flag); + + if (!equal) return equal; + + //Fidelity Range Extensions Stuff + //It is initialized to zero, so should be ok to check all the time. + equal &= (pps1->transform_8x8_mode_flag == pps2->transform_8x8_mode_flag); + equal &= (pps1->pic_scaling_matrix_present_flag == pps2->pic_scaling_matrix_present_flag); + if(pps1->pic_scaling_matrix_present_flag) + { + for(i = 0; i < (6 + ((unsigned)pps1->transform_8x8_mode_flag << 1)); i++) + { + equal &= (pps1->pic_scaling_list_present_flag[i] == pps2->pic_scaling_list_present_flag[i]); + if(pps1->pic_scaling_list_present_flag[i]) + { + if(i < 6) + { + for (j = 0; j < 16; j++) + equal &= (pps1->ScalingList4x4[i][j] == pps2->ScalingList4x4[i][j]); + } + else + { + for (j = 0; j < 64; j++) + equal &= (pps1->ScalingList8x8[i-6][j] == pps2->ScalingList8x8[i-6][j]); + } + } + } + } + equal &= (pps1->second_chroma_qp_index_offset == pps2->second_chroma_qp_index_offset); + + return equal; +} diff --git a/Src/h264dec/lcommon/src/transform.c b/Src/h264dec/lcommon/src/transform.c new file mode 100644 index 00000000..617ca7c1 --- /dev/null +++ b/Src/h264dec/lcommon/src/transform.c @@ -0,0 +1,809 @@ +/*! +*************************************************************************** +* \file transform.c +* +* \brief +* Transform functions +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Alexis Michael Tourapis +* \date +* 01. July 2007 +************************************************************************** +*/ +#include "global.h" +#include "transform.h" +#include <emmintrin.h> + +void forward4x4(int **block, int **tblock, int pos_y, int pos_x) +{ + int i, ii; + int tmp[16]; + int *pTmp = tmp, *pblock; + int p0,p1,p2,p3; + int t0,t1,t2,t3; + + // Horizontal + for (i=pos_y; i < pos_y + BLOCK_SIZE; i++) + { + pblock = &block[i][pos_x]; + p0 = *(pblock++); + p1 = *(pblock++); + p2 = *(pblock++); + p3 = *(pblock ); + + t0 = p0 + p3; + t1 = p1 + p2; + t2 = p1 - p2; + t3 = p0 - p3; + + *(pTmp++) = t0 + t1; + *(pTmp++) = (t3 << 1) + t2; + *(pTmp++) = t0 - t1; + *(pTmp++) = t3 - (t2 << 1); + } + + // Vertical + for (i=0; i < BLOCK_SIZE; i++) + { + pTmp = tmp + i; + p0 = *pTmp; + p1 = *(pTmp += BLOCK_SIZE); + p2 = *(pTmp += BLOCK_SIZE); + p3 = *(pTmp += BLOCK_SIZE); + + t0 = p0 + p3; + t1 = p1 + p2; + t2 = p1 - p2; + t3 = p0 - p3; + + ii = pos_x + i; + tblock[pos_y ][ii] = t0 + t1; + tblock[pos_y + 1][ii] = t2 + (t3 << 1); + tblock[pos_y + 2][ii] = t0 - t1; + tblock[pos_y + 3][ii] = t3 - (t2 << 1); + } +} + +static void inverse4x4(const h264_short_block_t tblock, h264_short_block_t block, int pos_y, int pos_x) +{ + int i; + short tmp[16]; + short *pTmp = tmp; + int p0,p1,p2,p3; + int t0,t1,t2,t3; + + // Horizontal + for (i = 0; i < BLOCK_SIZE; i++) + { + t0 = tblock[i][0]; + t1 = tblock[i][1]; + t2 = tblock[i][2]; + t3 = tblock[i][3]; + + p0 = t0 + t2; + p1 = t0 - t2; + p2 = (t1 >> 1) - t3; + p3 = t1 + (t3 >> 1); + + *(pTmp++) = p0 + p3; + *(pTmp++) = p1 + p2; + *(pTmp++) = p1 - p2; + *(pTmp++) = p0 - p3; + } + + // Vertical + for (i = 0; i < BLOCK_SIZE; i++) + { + pTmp = tmp + i; + t0 = *pTmp; + t1 = *(pTmp += BLOCK_SIZE); + t2 = *(pTmp += BLOCK_SIZE); + t3 = *(pTmp += BLOCK_SIZE); + + p0 = t0 + t2; + p1 = t0 - t2; + p2 =(t1 >> 1) - t3; + p3 = t1 + (t3 >> 1); + + block[0][i] = p0 + p3; + block[1][i] = p1 + p2; + block[2][i] = p1 - p2; + block[3][i] = p0 - p3; + } +} + +#ifdef _M_IX86 +// benski> this exists just for conformance testing. not used in production code +static void inverse4x4_sse2_x86(const h264_short_macroblock_t tblock, h264_short_macroblock_t block, int pos_y, int pos_x) +{ + __asm + { + mov edx, pos_y + shl edx, 4 // 16 step stride + add edx, pos_x + shl edx, 1 // * sizeof(short) + + // eax: pointer to the start of tblock (offset by passed pos_y, pos_x) + mov eax, edx + add eax, tblock + + // esi: results + mov esi, edx + add esi, block + + // load 4x4 matrix + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 32[eax] + movq mm2, MMWORD PTR 64[eax] + movq mm3, MMWORD PTR 96[eax] + + // rotate 4x4 matrix + movq mm4, mm0 // p0 = mm4 (copy) + punpcklwd mm0, mm2 // r0 = mm0 + punpckhwd mm4, mm2 // r2 = mm4 + movq mm5, mm1 // p1 = mm5 (copy) + punpcklwd mm1, mm3 // r1 = mm1 + punpckhwd mm5, mm3 // r3 = mm5 + movq mm6, mm0 // r0 = mm6 (copy) + punpcklwd mm0, mm1 // t0 = mm0 + punpckhwd mm6, mm1 // t1 = mm6 + movq mm1, mm4 // r2 = mm1 (copy) + punpcklwd mm1, mm5 // t2 = mm1 + punpckhwd mm4, mm5 // t3 = mm4 + + /* register state: + mm0: t0 + mm1: t2 + mm2: + mm3: + mm4: t3 + mm5: + mm6: t1 + mm7: + */ + + /* + p0 = t0 + t2; + p1 = t0 - t2; + p2 = (t1 >> 1) - t3; + p3 = t1 + (t3 >> 1); + */ + movq mm2, mm0 // mm2 = t0 (copy) + paddw mm0, mm1 // mm0 = p0 + psubw mm2, mm1 // mm2 = p1, mm1 available + movq mm5, mm6 // mm5 = t1 (copy) + psraw mm5, 1 // mm5 = (t1 >> 1) + psubw mm5, mm4 // mm5 = p2 + psraw mm4, 1 // mm4 = (t3 >> 1) + paddw mm6, mm4 // mm6 = p3 + + /* register state: + mm0: p0 + mm1: + mm2: p1 + mm3: + mm4: + mm5: p2 + mm6: p3 + mm7: + */ + + /* + *(pTmp++) = p0 + p3; + *(pTmp++) = p1 + p2; + *(pTmp++) = p1 - p2; + *(pTmp++) = p0 - p3; + */ + + movq mm3, mm0 // mm3 = p0 (copy) + paddw mm0, mm6 // mm0 = r0 + movq mm1, mm2 // mm1 = p1 (copy) + paddw mm1, mm5 // mm1 = r1 + psubw mm2, mm5 // mm2 = r2, mm5 available + psubw mm3, mm6 // mm3 = r3 + + /* register state: + mm0: r0 + mm1: r1 + mm2: r2 + mm3: r3 + mm4: + mm5: + mm6: + mm7: + */ + + // rotate 4x4 matrix to set up for vertical + movq mm4, mm0 // r0 = mm4 (copy) + punpcklwd mm0, mm2 // p0 = mm0 + punpckhwd mm4, mm2 // p2 = mm4 + movq mm5, mm1 // r1 = mm5 (copy) + punpcklwd mm1, mm3 // p1 = mm1 + punpckhwd mm5, mm3 // p3 = mm5 + movq mm6, mm0 // p0 = mm6 (copy) + punpcklwd mm0, mm1 // t0 = mm0 + punpckhwd mm6, mm1 // t1 = mm6 + movq mm1, mm4 // p2 = mm1 (copy) + punpcklwd mm1, mm5 // t2 = mm1 + punpckhwd mm4, mm5 // t3 = mm4 + + /* register state: + mm0: t0 + mm1: t2 + mm2: + mm3: + mm4: t3 + mm5: + mm6: t1 + mm7: + */ + /* + p0 = t0 + t2; + p1 = t0 - t2; + p2 = (t1 >> 1) - t3; + p3 = t1 + (t3 >> 1); + */ + movq mm2, mm0 // mm2 = t0 (copy) + paddw mm0, mm1 // mm0 = p0 + psubw mm2, mm1 // mm2 = p1, mm1 available + movq mm5, mm6 // mm5 = t1 (copy) + psraw mm5, 1 // mm5 = (t1 >> 1) + psubw mm5, mm4 // mm5 = p2 + psraw mm4, 1 // mm4 = (t3 >> 1) + paddw mm6, mm4 // mm6 = p3 + + /* register state: + mm0: p0 + mm1: + mm2: p1 + mm3: + mm4: + mm5: p2 + mm6: p3 + mm7: + */ + + /* + *(pTmp++) = p0 + p3; + *(pTmp++) = p1 + p2; + *(pTmp++) = p1 - p2; + *(pTmp++) = p0 - p3; + */ + + movq mm3, mm0 // mm3 = p0 (copy) + paddw mm0, mm6 // mm0 = r0 + movq mm1, mm2 // mm1 = p1 (copy) + paddw mm1, mm5 // mm1 = r1 + psubw mm2, mm5 // mm2 = r2, mm5 available + psubw mm3, mm6 // mm3 = r3 + + /* register state: + mm0: r0 + mm1: r1 + mm2: r2 + mm3: r3 + mm4: + mm5: + mm6: + mm7: + */ + movq XMMWORD PTR 0[esi], mm0 + movq XMMWORD PTR 32[esi], mm1 + movq XMMWORD PTR 64[esi], mm2 + movq XMMWORD PTR 96[esi], mm3 + } +} +#endif + +static void sample_reconstruct(h264_imgpel_macroblock_t curImg, const h264_imgpel_macroblock_t mpr, const h264_short_block_t tblock, int joff, int mb_x, int max_imgpel_value) +{ + #ifdef _M_IX86 + __asm + { + // mm0 : constant value 32 + mov edx, 0x00200020 + movd mm0, edx + punpckldq mm0, mm0 + + // ecx: y offset + mov ecx, joff + shl ecx, 4 // imgpel stuff is going to be 16 byte stride + add ecx, mb_x + + // eax: curImg + mov eax, curImg + add eax, ecx + + // edx: mpr + mov edx, mpr + add edx, ecx + + // ecx: tblock (which is short, not byte) + mov ecx, tblock + + // mm7: zero + pxor mm7, mm7 + + // load coefficients + movq mm1, MMWORD PTR 0[ecx] + movq mm2, MMWORD PTR 8[ecx] + movq mm3, MMWORD PTR 16[ecx] + movq mm4, MMWORD PTR 24[ecx] + paddw mm1, mm0 // rres + 32 + paddw mm2, mm0 // rres + 32 + paddw mm3, mm0 // rres + 32 + paddw mm0, mm4 // rres + 32 + psraw mm1, 6 // (rres + 32) >> 6 + psraw mm2, 6 // (rres + 32) >> 6 + psraw mm3, 6 // (rres + 32) >> 6 + psraw mm0, 6 // (rres + 32) >> 6 + // mm1-mm3: tblock[0] - tblock[2], mm0: tblock[3] + + // convert mpr from unsigned char to short + movd mm4, DWORD PTR 0[edx] + movd mm5, DWORD PTR 16[edx] + movd mm6, DWORD PTR 32[edx] + punpcklbw mm4, mm7 + punpcklbw mm5, mm7 + punpcklbw mm6, mm7 + paddsw mm4, mm1 // pred_row + rres_row + movd mm1, DWORD PTR 48[edx] // reuse mm1 for mpr[3] + paddsw mm5, mm2 // pred_row + rres_row + punpcklbw mm1, mm7 + paddsw mm6, mm3 // pred_row + rres_row + paddsw mm1, mm0 // pred_row + rres_row + // results in mm4, mm5, mm6, mm1 + + // move back to 8 bit + packuswb mm4, mm7 + packuswb mm5, mm7 + packuswb mm6, mm7 + packuswb mm1, mm7 + movd DWORD PTR 0[eax], mm4 + movd DWORD PTR 16[eax], mm5 + movd DWORD PTR 32[eax], mm6 + movd DWORD PTR 48[eax], mm1 + } +#else + int i, j; + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i=0;i<BLOCK_SIZE;i++) + curImg[j+joff][mb_x+i] = (imgpel) iClip1( max_imgpel_value, rshift_rnd_sf(tblock[j][i], DQ_BITS) + mpr[j+joff][mb_x+i]); + } +#endif +} + +#if defined(_M_IX86) && defined(_DEBUG) +void itrans4x4_sse2(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y) +{ + __declspec(align(32)) static const short const32[4] = {32, 32, 32, 32}; + __asm + { + mov edx, pos_y + shl edx, 4 // imgpel stuff is going to be 16 byte stride + add edx, pos_x + + // eax: tblock + lea eax, [edx*2] + add eax, tblock + + // ecx: mpr + mov ecx, mb_pred + add ecx, edx + + // edx: results + add edx, mb_rec + + // load 4x4 matrix + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 32[eax] + movq mm2, MMWORD PTR 64[eax] + movq mm3, MMWORD PTR 96[eax] + + // rotate 4x4 matrix + movq mm4, mm0 // p0 = mm4 (copy) + punpcklwd mm0, mm2 // r0 = mm0 + punpckhwd mm4, mm2 // r2 = mm4 + movq mm5, mm1 // p1 = mm5 (copy) + punpcklwd mm1, mm3 // r1 = mm1 + punpckhwd mm5, mm3 // r3 = mm5 + movq mm6, mm0 // r0 = mm6 (copy) + punpcklwd mm0, mm1 // t0 = mm0 + punpckhwd mm6, mm1 // t1 = mm6 + movq mm1, mm4 // r2 = mm1 (copy) + punpcklwd mm1, mm5 // t2 = mm1 + punpckhwd mm4, mm5 // t3 = mm4 + + /* register state: + mm0: t0 + mm1: t2 + mm2: + mm3: + mm4: t3 + mm5: + mm6: t1 + mm7: + */ + + /* + p0 = t0 + t2; + p1 = t0 - t2; + p2 = (t1 >> 1) - t3; + p3 = t1 + (t3 >> 1); + */ + movq mm2, mm0 // mm2 = t0 (copy) + paddw mm0, mm1 // mm0 = p0 + psubw mm2, mm1 // mm2 = p1, mm1 available + movq mm5, mm6 // mm5 = t1 (copy) + psraw mm5, 1 // mm5 = (t1 >> 1) + psubw mm5, mm4 // mm5 = p2 + psraw mm4, 1 // mm4 = (t3 >> 1) + paddw mm6, mm4 // mm6 = p3 + + /* register state: + mm0: p0 + mm1: + mm2: p1 + mm3: + mm4: + mm5: p2 + mm6: p3 + mm7: + */ + + /* + *(pTmp++) = p0 + p3; + *(pTmp++) = p1 + p2; + *(pTmp++) = p1 - p2; + *(pTmp++) = p0 - p3; + */ + + movq mm3, mm0 // mm3 = p0 (copy) + paddw mm0, mm6 // mm0 = r0 + movq mm1, mm2 // mm1 = p1 (copy) + paddw mm1, mm5 // mm1 = r1 + psubw mm2, mm5 // mm2 = r2, mm5 available + psubw mm3, mm6 // mm3 = r3 + + /* register state: + mm0: r0 + mm1: r1 + mm2: r2 + mm3: r3 + mm4: + mm5: + mm6: + mm7: + */ + + // rotate 4x4 matrix to set up for vertical + movq mm4, mm0 // r0 = mm4 (copy) + punpcklwd mm0, mm2 // p0 = mm0 + punpckhwd mm4, mm2 // p2 = mm4 + movq mm5, mm1 // r1 = mm5 (copy) + punpcklwd mm1, mm3 // p1 = mm1 + punpckhwd mm5, mm3 // p3 = mm5 + movq mm6, mm0 // p0 = mm6 (copy) + punpcklwd mm0, mm1 // t0 = mm0 + punpckhwd mm6, mm1 // t1 = mm6 + movq mm1, mm4 // p2 = mm1 (copy) + punpcklwd mm1, mm5 // t2 = mm1 + punpckhwd mm4, mm5 // t3 = mm4 + + /* register state: + mm0: t0 + mm1: t2 + mm2: + mm3: + mm4: t3 + mm5: + mm6: t1 + mm7: + */ + /* + p0 = t0 + t2; + p1 = t0 - t2; + p2 = (t1 >> 1) - t3; + p3 = t1 + (t3 >> 1); + */ + movq mm2, mm0 // mm2 = t0 (copy) + paddw mm0, mm1 // mm0 = p0 + psubw mm2, mm1 // mm2 = p1, mm1 available + movq mm5, mm6 // mm5 = t1 (copy) + psraw mm5, 1 // mm5 = (t1 >> 1) + psubw mm5, mm4 // mm5 = p2 + psraw mm4, 1 // mm4 = (t3 >> 1) + paddw mm6, mm4 // mm6 = p3 + + /* register state: + mm0: p0 + mm1: + mm2: p1 + mm3: + mm4: + mm5: p2 + mm6: p3 + mm7: + */ + + /* + *(pTmp++) = p0 + p3; + *(pTmp++) = p1 + p2; + *(pTmp++) = p1 - p2; + *(pTmp++) = p0 - p3; + */ + + movq mm3, mm0 // mm3 = p0 (copy) + paddw mm0, mm6 // mm0 = r0 + movq mm1, mm2 // mm1 = p1 (copy) + paddw mm1, mm5 // mm1 = r1 + psubw mm2, mm5 // mm2 = r2, mm5 available + psubw mm3, mm6 // mm3 = r3 + + /* register state: + mm0: r0 + mm1: r1 + mm2: r2 + mm3: r3 + mm4: + mm5: + mm6: + mm7: + */ +/* --- 4x4 iDCT done, now time to combine with mpr --- */ + // mm0 : constant value 32 + movq mm7, const32 + + paddw mm0, mm7 // rres + 32 + psraw mm0, 6 // (rres + 32) >> 6 + paddw mm1, mm7 // rres + 32 + psraw mm1, 6 // (rres + 32) >> 6 + paddw mm2, mm7 // rres + 32 + psraw mm2, 6 // (rres + 32) >> 6 + paddw mm3, mm7 // rres + 32 + psraw mm3, 6 // (rres + 32) >> 6 + + pxor mm7, mm7 + + // convert mpr from unsigned char to short + movd mm4, DWORD PTR 0[ecx] + movd mm5, DWORD PTR 16[ecx] + movd mm6, DWORD PTR 32[ecx] + punpcklbw mm4, mm7 + punpcklbw mm5, mm7 + punpcklbw mm6, mm7 + paddsw mm4, mm0 // pred_row + rres_row + movd mm0, DWORD PTR 48[ecx] // reuse mm0 for mpr[3] + paddsw mm5, mm1 // pred_row + rres_row + punpcklbw mm0, mm7 + paddsw mm6, mm2 // pred_row + rres_row + paddsw mm0, mm3 // pred_row + rres_row + // results in mm4, mm5, mm6, mm0 + + // move back to 8 bit + packuswb mm4, mm7 + packuswb mm5, mm7 + packuswb mm6, mm7 + packuswb mm0, mm7 + movd DWORD PTR 0[edx], mm4 + movd DWORD PTR 16[edx], mm5 + movd DWORD PTR 32[edx], mm6 + movd DWORD PTR 48[edx], mm0 + } +} +#elif defined(_M_X64) +static void itrans4x4_sse2(const h264_int_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y) +{ + __declspec(align(32)) static const int const32[4] = {32, 32, 32, 32}; + __m128i p0,p1,p2,p3; + __m128i t0,t1,t2,t3; + __m128i r0,r1,r2,r3; + __m128i c32, zero; + + // horizontal + // load registers in vertical mode, we'll rotate them next + p0 = _mm_loadu_si128((__m128i *)&tblock[pos_y][pos_x]); // 00 01 02 03 + p1 = _mm_loadu_si128((__m128i *)&tblock[pos_y+1][pos_x]); // 10 11 12 13 + p2 = _mm_loadu_si128((__m128i *)&tblock[pos_y+2][pos_x]); // 20 21 22 23 + p3 = _mm_loadu_si128((__m128i *)&tblock[pos_y+3][pos_x]); // 30 31 32 33 + + // rotate 4x4 matrix + r0 = _mm_unpacklo_epi32(p0, p2); // 00 20 01 21 + r1 = _mm_unpacklo_epi32(p1, p3); // 10 30 11 31 + r2 = _mm_unpackhi_epi32(p0, p2); // 02 22 03 23 + r3 = _mm_unpackhi_epi32(p1, p3); // 12 32 13 33 + t0 = _mm_unpacklo_epi32(r0, r1); // 00 10 20 30 + t1 = _mm_unpackhi_epi32(r0, r1); // 01 11 21 31 + t2 = _mm_unpacklo_epi32(r2, r3); // 02 12 22 32 + t3 = _mm_unpackhi_epi32(r2, r3); // 03 13 23 33 + + p0 = _mm_add_epi32(t0, t2); //t0 + t2; + p1 = _mm_sub_epi32(t0, t2); // t0 - t2; + p2 = _mm_srai_epi32(t1, 1); // t1 >> 1 + p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3; + p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1) + p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1); + + t0 = _mm_add_epi32(p0, p3); //p0 + p3; + t1 = _mm_add_epi32(p1, p2);//p1 + p2; + t2 = _mm_sub_epi32(p1, p2); //p1 - p2; + t3 = _mm_sub_epi32(p0, p3); //p0 - p3; + + // rotate 4x4 matrix to set up for vertical + r0 = _mm_unpacklo_epi32(t0, t2); + r1 = _mm_unpacklo_epi32(t1, t3); + r2 = _mm_unpackhi_epi32(t0, t2); + r3 = _mm_unpackhi_epi32(t1, t3); + t0 = _mm_unpacklo_epi32(r0, r1); + t1 = _mm_unpackhi_epi32(r0, r1); + t2 = _mm_unpacklo_epi32(r2, r3); + t3 = _mm_unpackhi_epi32(r2, r3); + + // vertical + p0 = _mm_add_epi32(t0, t2); //t0 + t2; + p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1) + p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1); + r0 = _mm_add_epi32(p0, p3); //p0 + p3; + r3 = _mm_sub_epi32(p0, p3); //p0 - p3; + p1 = _mm_sub_epi32(t0, t2); // t0 - t2; + p2 = _mm_srai_epi32(t1, 1); // t1 >> 1 + p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3; + r1 = _mm_add_epi32(p1, p2);//p1 + p2; + r2 = _mm_sub_epi32(p1, p2); //p1 - p2; + + c32 = _mm_load_si128((const __m128i *)const32); + zero = _mm_setzero_si128(); + + // (x + 32) >> 6 + r0 = _mm_add_epi32(r0, c32); + r0 = _mm_srai_epi32(r0, 6); + r1 = _mm_add_epi32(r1, c32); + r1 = _mm_srai_epi32(r1, 6); + r2 = _mm_add_epi32(r2, c32); + r2 = _mm_srai_epi32(r2, 6); + r3 = _mm_add_epi32(r3, c32); + r3 = _mm_srai_epi32(r3, 6); + + // convert to 16bit values + r0 = _mm_packs_epi32(r0, r1); + r2 = _mm_packs_epi32(r2, r3); + + // convert mpr from unsigned char to short + p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y][pos_x]); + p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+1][pos_x]); + p0 = _mm_unpacklo_epi32(p0, p1); + p0 = _mm_unpacklo_epi8(p0, zero); // convert to short + r0 = _mm_add_epi16(r0, p0); + + p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+2][pos_x]); + p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+3][pos_x]); + p0 = _mm_unpacklo_epi32(p0, p1); + p0 = _mm_unpacklo_epi8(p0, zero); // convert to short + r2 = _mm_add_epi16(r2, p0); + + r0 = _mm_packus_epi16(r0, r2); // convert to unsigned char + *(int32_t *)&mb_rec[pos_y][pos_x] = _mm_cvtsi128_si32(r0); + r0 = _mm_srli_si128(r0, 4); + *(int32_t *)&mb_rec[pos_y+1][pos_x] = _mm_cvtsi128_si32(r0); + r0 = _mm_srli_si128(r0, 4); + *(int32_t *)&mb_rec[pos_y+2][pos_x] = _mm_cvtsi128_si32(r0); + r0 = _mm_srli_si128(r0, 4); + *(int32_t *)&mb_rec[pos_y+3][pos_x] = _mm_cvtsi128_si32(r0); +} +#endif + +void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y) +{ + inverse4x4(tblock, (h264_short_block_row_t *)tblock,pos_y,pos_x); + sample_reconstruct(mb_rec, mb_pred, tblock, pos_y, pos_x, 255); +} + +void ihadamard4x4(int block[4][4]) +{ + int i; + int tmp[16]; + int *pTmp = tmp; + int p0,p1,p2,p3; + int t0,t1,t2,t3; + + // Horizontal + for (i = 0; i < BLOCK_SIZE; i++) + { + t0 = block[i][0]; + t1 = block[i][1]; + t2 = block[i][2]; + t3 = block[i][3]; + + p0 = t0 + t2; + p1 = t0 - t2; + p2 = t1 - t3; + p3 = t1 + t3; + + *(pTmp++) = p0 + p3; + *(pTmp++) = p1 + p2; + *(pTmp++) = p1 - p2; + *(pTmp++) = p0 - p3; + } + + // Vertical + for (i = 0; i < BLOCK_SIZE; i++) + { + pTmp = tmp + i; + t0 = *pTmp; + t1 = *(pTmp += BLOCK_SIZE); + t2 = *(pTmp += BLOCK_SIZE); + t3 = *(pTmp += BLOCK_SIZE); + + p0 = t0 + t2; + p1 = t0 - t2; + p2 = t1 - t3; + p3 = t1 + t3; + + block[0][i] = p0 + p3; + block[1][i] = p1 + p2; + block[2][i] = p1 - p2; + block[3][i] = p0 - p3; + } +} + +void ihadamard4x2(int **tblock, int **block) +{ + int i; + int tmp[8]; + int *pTmp = tmp; + int p0,p1,p2,p3; + int t0,t1,t2,t3; + + // Horizontal + *(pTmp++) = tblock[0][0] + tblock[1][0]; + *(pTmp++) = tblock[0][1] + tblock[1][1]; + *(pTmp++) = tblock[0][2] + tblock[1][2]; + *(pTmp++) = tblock[0][3] + tblock[1][3]; + + *(pTmp++) = tblock[0][0] - tblock[1][0]; + *(pTmp++) = tblock[0][1] - tblock[1][1]; + *(pTmp++) = tblock[0][2] - tblock[1][2]; + *(pTmp ) = tblock[0][3] - tblock[1][3]; + + // Vertical + pTmp = tmp; + for (i = 0; i < 2; i++) + { + p0 = *(pTmp++); + p1 = *(pTmp++); + p2 = *(pTmp++); + p3 = *(pTmp++); + + t0 = p0 + p2; + t1 = p0 - p2; + t2 = p1 - p3; + t3 = p1 + p3; + + // coefficients (transposed) + block[0][i] = t0 + t3; + block[1][i] = t1 + t2; + block[2][i] = t1 - t2; + block[3][i] = t0 - t3; + } +} + +//following functions perform 8 additions, 8 assignments. Should be a bit faster +void ihadamard2x2(int tblock[4], int block[4]) +{ + int t0,t1,t2,t3; + + t0 = tblock[0] + tblock[1]; + t1 = tblock[0] - tblock[1]; + t2 = tblock[2] + tblock[3]; + t3 = tblock[2] - tblock[3]; + + block[0] = (t0 + t2); + block[1] = (t1 + t3); + block[2] = (t0 - t2); + block[3] = (t1 - t3); +} + diff --git a/Src/h264dec/lcommon/src/win32.c b/Src/h264dec/lcommon/src/win32.c new file mode 100644 index 00000000..7d921e1e --- /dev/null +++ b/Src/h264dec/lcommon/src/win32.c @@ -0,0 +1,67 @@ + +/*! + ************************************************************************************* + * \file win32.c + * + * \brief + * Platform dependent code + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Suehring <suehring@hhi.de> + ************************************************************************************* + */ + +#include "global.h" + + +#ifdef _WIN32 + +static LARGE_INTEGER freq; + +void gettime(TIME_T* time) +{ + QueryPerformanceCounter(time); +} + +int64 timediff(TIME_T* start, TIME_T* end) +{ + return (int64)((end->QuadPart - start->QuadPart)); +} + +int64 timenorm(int64 cur_time) +{ + static int first = 1; + + if(first) + { + QueryPerformanceFrequency(&freq); + first = 0; + } + + return (int64)(cur_time * 1000 /(freq.QuadPart)); +} + +#else + +static struct timezone tz; + +void gettime(TIME_T* time) +{ + gettimeofday(time, &tz); +} + +int64 timediff(TIME_T* start, TIME_T* end) +{ + int t1, t2; + + t1 = end->tv_sec - start->tv_sec; + t2 = end->tv_usec - start->tv_usec; + return (int64) t2 + (int64) t1 * (int64) 1000000; +} + +int64 timenorm(int64 cur_time) +{ + return (int64)(cur_time / (int64) 1000); +} +#endif diff --git a/Src/h264dec/ldecod/inc/biaridecod.h b/Src/h264dec/ldecod/inc/biaridecod.h new file mode 100644 index 00000000..9364632b --- /dev/null +++ b/Src/h264dec/ldecod/inc/biaridecod.h @@ -0,0 +1,157 @@ + +/*! + *************************************************************************** + * \file + * biaridecod.h + * + * \brief + * Headerfile for binary arithmetic decoder routines + * + * \author + * Detlev Marpe, + * Gabi Blättermann + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. Oct 2000 + ************************************************************************** + */ + +#ifndef _BIARIDECOD_H_ +#define _BIARIDECOD_H_ + + +/************************************************************************ + * D e f i n i t i o n s + *********************************************************************** + */ + +/* Range table for LPS */ +/* +static const byte rLPS_table_64x4[64][4]= +{ + { 128, 176, 208, 240}, + { 128, 167, 197, 227}, + { 128, 158, 187, 216}, + { 123, 150, 178, 205}, + { 116, 142, 169, 195}, + { 111, 135, 160, 185}, + { 105, 128, 152, 175}, + { 100, 122, 144, 166}, + { 95, 116, 137, 158}, + { 90, 110, 130, 150}, + { 85, 104, 123, 142}, + { 81, 99, 117, 135}, + { 77, 94, 111, 128}, + { 73, 89, 105, 122}, + { 69, 85, 100, 116}, + { 66, 80, 95, 110}, + { 62, 76, 90, 104}, + { 59, 72, 86, 99}, + { 56, 69, 81, 94}, + { 53, 65, 77, 89}, + { 51, 62, 73, 85}, + { 48, 59, 69, 80}, + { 46, 56, 66, 76}, + { 43, 53, 63, 72}, + { 41, 50, 59, 69}, + { 39, 48, 56, 65}, + { 37, 45, 54, 62}, + { 35, 43, 51, 59}, + { 33, 41, 48, 56}, + { 32, 39, 46, 53}, + { 30, 37, 43, 50}, + { 29, 35, 41, 48}, + { 27, 33, 39, 45}, + { 26, 31, 37, 43}, + { 24, 30, 35, 41}, + { 23, 28, 33, 39}, + { 22, 27, 32, 37}, + { 21, 26, 30, 35}, + { 20, 24, 29, 33}, + { 19, 23, 27, 31}, + { 18, 22, 26, 30}, + { 17, 21, 25, 28}, + { 16, 20, 23, 27}, + { 15, 19, 22, 25}, + { 14, 18, 21, 24}, + { 14, 17, 20, 23}, + { 13, 16, 19, 22}, + { 12, 15, 18, 21}, + { 12, 14, 17, 20}, + { 11, 14, 16, 19}, + { 11, 13, 15, 18}, + { 10, 12, 15, 17}, + { 10, 12, 14, 16}, + { 9, 11, 13, 15}, + { 9, 11, 12, 14}, + { 8, 10, 12, 14}, + { 8, 9, 11, 13}, + { 7, 9, 11, 12}, + { 7, 9, 10, 12}, + { 7, 8, 10, 11}, + { 6, 8, 9, 11}, + { 6, 7, 9, 10}, + { 6, 7, 8, 9}, + { 2, 2, 2, 2} +};*/ +static const byte rLPS_table_64x4[4][64]={ +{128, 128, 128, 123, 116, 111, 105, 100, 95, 90, 85, 81, 77, 73, 69, 66, + 62, 59, 56, 53, 51, 48, 46, 43, 41, 39, 37, 35, 33, 32, 30, 29, + 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 12, + 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 7, 6, 6, 6, 2, + }, +{176, 167, 158, 150, 142, 135, 128, 122, 116, 110, 104, 99, 94, 89, 85, 80, + 76, 72, 69, 65, 62, 59, 56, 53, 50, 48, 45, 43, 41, 39, 37, 35, + 33, 31, 30, 28, 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 14, 13, 12, 12, 11, 11, 10, 9, 9, 9, 8, 8, 7, 7, 2, + }, +{208, 197, 187, 178, 169, 160, 152, 144, 137, 130, 123, 117, 111, 105, 100, 95, + 90, 86, 81, 77, 73, 69, 66, 63, 59, 56, 54, 51, 48, 46, 43, 41, + 39, 37, 35, 33, 32, 30, 29, 27, 26, 25, 23, 22, 21, 20, 19, 18, + 17, 16, 15, 15, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 2, + }, +{240, 227, 216, 205, 195, 185, 175, 166, 158, 150, 142, 135, 128, 122, 116, 110, + 104, 99, 94, 89, 85, 80, 76, 72, 69, 65, 62, 59, 56, 53, 50, 48, + 45, 43, 41, 39, 37, 35, 33, 31, 30, 28, 27, 25, 24, 23, 22, 21, + 20, 19, 18, 17, 16, 15, 14, 14, 13, 12, 12, 11, 11, 10, 9, 2, +}}; + +// make uint16 to match biari_decode_symbol +static const byte AC_next_state_MPS_64[64] = +{ + 1,2,3,4,5,6,7,8,9,10, + 11,12,13,14,15,16,17,18,19,20, + 21,22,23,24,25,26,27,28,29,30, + 31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50, + 51,52,53,54,55,56,57,58,59,60, + 61,62,62,63 +}; + +// make uint16 to match biari_decode_symbol +static const byte AC_next_state_LPS_64[64] = +{ + 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, + 8, 9, 9,11,11,12,13,13,15,15, + 16,16,18,18,19,19,21,21,22,22, + 23,24,24,25,26,26,27,27,28,29, + 29,30,30,30,31,32,32,33,33,33, + 34,34,35,35,35,36,36,36,37,37, + 37,38,38,63 +}; + +static const byte renorm_table_32[32]={6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; +static const byte renorm_table_256[256]={6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; + +extern void arideco_start_decoding(DecodingEnvironmentPtr eep, unsigned char *code_buffer, int firstbyte, int *code_len); +//extern int arideco_bits_read(const DecodingEnvironmentPtr dep); +extern void arideco_done_decoding(DecodingEnvironmentPtr dep); +extern void biari_init_context (int qp, BiContextTypePtr ctx, const char* ini); +extern unsigned int biari_decode_symbol(DecodingEnvironmentPtr dep, BiContextTypePtr bi_ct ); +extern unsigned int biari_decode_symbol_eq_prob(DecodingEnvironmentPtr dep); +extern unsigned int biari_decode_final(DecodingEnvironmentPtr dep); +extern unsigned int getbyte(DecodingEnvironmentPtr dep); +extern unsigned int getword(DecodingEnvironmentPtr dep); +#endif // BIARIDECOD_H_ + diff --git a/Src/h264dec/ldecod/inc/block.h b/Src/h264dec/ldecod/inc/block.h new file mode 100644 index 00000000..d819b13c --- /dev/null +++ b/Src/h264dec/ldecod/inc/block.h @@ -0,0 +1,133 @@ + +/*! + ************************************************************************ + * \file block.h + * + * \brief + * definitions for block decoding functions + * + * \author + * Inge Lille-Langoy <inge.lille-langoy@telenor.com> \n + * Telenor Satellite Services \n + * P.O.Box 6914 St.Olavs plass \n + * N-0130 Oslo, Norway + * + ************************************************************************ + */ + +#ifndef _BLOCK_H_ +#define _BLOCK_H_ + +#include "global.h" +#include "transform8x8.h" + +static const byte QP_SCALE_CR[52]= +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, + 12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, + 28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37, + 37,38,38,38,39,39,39,39 + +}; + +//! look up tables for FRExt_chroma support +static const unsigned char subblk_offset_x[3][8][4] = +{ + { + {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + }, + { + {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + }, + { + {0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12} + } +}; + + +static const unsigned char subblk_offset_y[3][8][4] = +{ + { + {0, 0, 4, 4}, + {0, 0, 4, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} + }, + { + {0, 0, 4, 4}, + {8, 8,12,12}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} + }, + { + {0, 0, 4, 4}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {8, 8,12,12}, + {0, 0, 4, 4}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {8, 8,12,12} + } +}; + +static const byte decode_block_scan[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; +static const int cof4_pos_to_subblock[4][4] = { { 0, 1, 4, 5}, {2,3,6,7}, {8,9,12,13}, {10,11,14,15} }; + +extern void iMBtrans4x4(Macroblock *currMB, ColorPlane pl, int smb); +extern void iMBtrans8x8(Macroblock *currMB, ColorPlane pl); + +extern void itrans_sp_cr(Macroblock *currMB, int uv); + +extern void intrapred_chroma (Macroblock *currMB, int uv); + +extern void Inv_Residual_trans_4x4(Macroblock *currMB, ColorPlane pl, int ioff, int joff); +extern void Inv_Residual_trans_8x8(Macroblock *currMB, ColorPlane pl, int ioff,int joff); + +extern void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y); +extern void itrans4x4_mmx(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y); +extern int intrapred (Macroblock *currMB, ColorPlane pl, int ioff,int joff,int i4,int j4); +extern void itrans_2 (Macroblock *currMB, ColorPlane pl); +extern void iTransform (Macroblock *currMB, ColorPlane pl, int smb); + +extern void copy_image_data (imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x, int width, int height); +extern void copy_image_data_16x16 (imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x); + +extern void copy_image_data_16x16_stride_c(struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source); +extern void copy_image_data_16x16_stride_sse(struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source); +extern void copy_image_data_8x8_stride (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source); +extern void copy_image_data_8x8_stride2 (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y); +extern void copy_image_data_4x4_stride (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y); +extern void copy_image_data_stride (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int width, int height); + +#endif + diff --git a/Src/h264dec/ldecod/inc/cabac.h b/Src/h264dec/ldecod/inc/cabac.h new file mode 100644 index 00000000..5458d2be --- /dev/null +++ b/Src/h264dec/ldecod/inc/cabac.h @@ -0,0 +1,64 @@ + +/*! + *************************************************************************** + * \file + * cabac.h + * + * \brief + * Header file for entropy coding routines + * + * \author + * Detlev Marpe \n + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. Oct 2000 (Changes by Tobias Oelbaum 28.08.2001) + *************************************************************************** + */ + +#ifndef _CABAC_H_ +#define _CABAC_H_ + +#include "global.h" + +typedef struct Run_Level +{ + int level; + int run; +} RunLevel; +extern MotionInfoContexts* create_contexts_MotionInfo(void); +extern TextureInfoContexts* create_contexts_TextureInfo(void); +extern void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx); +extern void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx); + +extern void cabac_new_slice(Slice *currSlice); + +extern int readMB_typeInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp); +extern int readB8_typeInfo_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp); +extern int readIntraPredMode_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp); +extern char readRefFrame_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int x, int y); +extern char readRefFrame_CABAC0(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int y); +extern int readMVD_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int k, int list, int x, int y); +extern int readCBP_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp); + +// readRunLevel_CABAC returns level and sets *run +extern RunLevel readRunLevel_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int context); +extern short readDquant_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp); +extern char readCIPredMode_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp); +extern int readMB_skip_flagInfo_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp); +extern Boolean readFieldModeInfo_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp); +extern Boolean readMB_transform_size_flag_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp); + +extern void readIPCM_CABAC(Slice *currSlice, struct datapartition *dP); + +extern int cabac_startcode_follows(Slice *currSlice, int eos_bit); + + +extern int check_next_mb_and_get_field_mode_CABAC(Slice *currSlice, DataPartition *act_dp); + +extern void CheckAvailabilityOfNeighborsCABAC(Macroblock *currMB); + +extern void set_read_and_store_CBP(Macroblock **currMB, int chroma_format_idc); + +#endif // _CABAC_H_ + diff --git a/Src/h264dec/ldecod/inc/context_ini.h b/Src/h264dec/ldecod/inc/context_ini.h new file mode 100644 index 00000000..73977be6 --- /dev/null +++ b/Src/h264dec/ldecod/inc/context_ini.h @@ -0,0 +1,23 @@ + +/*! + ************************************************************************************* + * \file context_ini.h + * + * \brief + * CABAC context initializations + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe <marpe@hhi.de> + * - Heiko Schwarz <hschwarz@hhi.de> + ************************************************************************************** + */ + + +#ifndef _CONTEXT_INI_ +#define _CONTEXT_INI_ + +extern void init_contexts (Slice *currslice); + +#endif + diff --git a/Src/h264dec/ldecod/inc/contributors.h b/Src/h264dec/ldecod/inc/contributors.h new file mode 100644 index 00000000..3e462d0b --- /dev/null +++ b/Src/h264dec/ldecod/inc/contributors.h @@ -0,0 +1,223 @@ + +/*! \file + * contributors.h + * \brief + * List of contributors and copyright information. + * + * \par Copyright statements + \verbatim + H.264 JM coder/decoder + + Copyright (C) 2000 by + Telenor Satellite Services, Norway + Ericsson Radio Systems, Sweden + TELES AG, Germany + Nokia Inc., USA + Nokia Corporation, Finland + Siemens AG, Germany + Fraunhofer-Institute for Telecommunications Heinrich-Hertz-Institut (HHI), Germany + University of Hannover, Institut of Communication Theory and Signal Processing, Germany + TICSP, Tampere University of Technology, Finland + Munich University of Technology, Institute for Communications Engineering, Germany + Videolocus, Canada + Motorola Inc., USA + Microsoft Corp., USA + Apple Computer, Inc. + RealNetworks, Inc., USA + Thomson, Inc., USA + Sejong Univ., Digital Media System Lab., Korea + \endverbatim + \par Full Contact Information + \verbatim + + Lowell Winger <lwinger@videolocus.com><lwinger@uwaterloo.ca> + Guy Côté <gcote@videolocus.com> + Michael Gallant <mgallant@videolocus.com> + VideoLocus Inc. + 97 Randall Dr. + Waterloo, ON, Canada N2V1C5 + + Inge Lille-Langøy <inge.lille-langoy@telenor.com> + Telenor Satellite Services + P.O.Box 6914 St.Olavs plass + N-0130 Oslo, Norway + + Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + Ericsson Radio Systems + KI/ERA/T/VV + 164 80 Stockholm, Sweden + + Stephan Wenger <stewe@cs.tu-berlin.de> + TU Berlin / TELES AG + Sekr. FR 6-3 + Franklinstr. 28-29 + D-10587 Berlin, Germany + + Jani Lainema <jani.lainema@nokia.com> + Nokia Inc. / Nokia Research Center + 6000 Connection Drive + Irving, TX 75039, USA + + Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> + Siemens AG + ICM MD MP RD MCH 83 + P.O.Box 80 17 07 + D-81617 Munich, Germany + + Thomas Wedi <wedi@tnt.uni-hannover.de> + University of Hannover + Institut of Communication Theory and Signal Processing + Appelstr. 9a + 30167 Hannover, Germany + + Guido Heising + Fraunhofer-Institute for Telecommunications + Heinrich-Hertz-Institut (HHI) + Einsteinufer 37 + 10587 Berlin + Germany + + Gabi Blaettermann + Fraunhofer-Institute for Telecommunications + Heinrich-Hertz-Institut (HHI) + Einsteinufer 37 + 10587 Berlin + Germany + + Detlev Marpe <marpe@hhi.de> + Fraunhofer-Institute for Telecommunications + Heinrich-Hertz-Institut (HHI) + Einsteinufer 37 + 10587 Berlin + Germany + + Ragip Kurceren <ragip.kurceren@nokia.com> + Nokia Inc. / Nokia Research Center + 6000 Connection Drive + Irving, TX 75039, USA + + Viktor Varsa <viktor.varsa@nokia.com> + Nokia Inc. / Nokia Research Center + 6000 Connection Drive + Irving, TX 75039, USA + + Ye-Kui Wang <wyk@ieee.org> + Tampere University of Technology + Tampere International Center for Signal Processing + 33720 Tampere, Finland + + Ari Hourunranta <ari.hourunranta@nokia.com> + Nokia Corporation / Nokia Mobile Phones + P.O. Box 88 + 33721 Tampere, Finland + + Yann Le Maguet <yann.lemaguet@philips.com> + Philips Research France + + Dong Tian <tian@cs.tut.fi> + Tampere University of Technology + Tampere International Center for Signal Processing + 33720 Tampere, Finland + + Miska M. Hannuksela <miska.hannuksela@nokia.com> + Nokia Corporation / Nokia Mobile Phones + P.O. Box 88 + 33721 Tampere, Finland + + Karsten Suehring <suehring@hhi.de> + Fraunhofer-Institute for Telecommunications + Heinrich-Hertz-Institut (HHI) + Einsteinufer 37 + 10587 Berlin + Germany + + Heiko Schwarz <hschwarz@hhi.de> + Fraunhofer-Institute for Telecommunications + Heinrich-Hertz-Institut (HHI) + Einsteinufer 37 + 10587 Berlin + Germany + + Tobias Oelbaum <drehvial@gmx.net> + Institute for Communications Engineering + Munich University of Technology + Germany + + Limin Wang <liwang@gi.com> + Krit Panusopone <kpanusopone@gi.com> + Rajeev Gandhi <rgandhi@gi.com> + Yue Yu <yyu@gi.com> + Motorola Inc. + 6450 Sequence Drive + San Diego, CA 92121 USA + + Feng Wu <fengwu@microsoft.com> + Xiaoyan Sun <sunxiaoyan@msrchina.research.microsoft.com> + Microsoft Research Asia + 3/F, Beijing Sigma Center + No.49, Zhichun Road, Hai Dian District, + Beijing China 100080 + + Yoshihiro Kikuchi <yoshihiro.kikuchi@toshiba.co.jp> + Takeshi Chujoh <takeshi.chujoh@toshiba.co.jp> + Toshiba Corporation + Research and Development Center + Kawasaki 212-8582, Japan + + Shinya Kadono <kadono@drl.mei.co.jp> + Matsushita Electric Industrial Co., Ltd. + 1006 Kadoma, Kadoma + Osaka 663-8113, Japan + + Dzung Hoang <dthoang@yahoo.com> + 10533 Roy Butler Dr. + Austin, TX 78717 + + Eric Viscito <eric@ev-consulting.com> + eV Consulting + 52 Tracy Ln + Shelburne, VT 05482 USA + + Barry Haskell + Apple Computer, Inc. <bhaskell@apple.com> + 2 Infinite Loop + Cupertino, California 95014 + + Greg Conklin + RealNetworks, Inc. <gregc@real.com> + 2601 Elliott Ave + Seattle, WA 98101 + + Jill Boyce <jill.boyce@thomson.net> + Cristina Gomila <cristina.gomila@thomson.net> + Thomson + 2 Independence Way + Princeton, NJ 08540 + + Alexis Michael Tourapis <alexismt@ieee.org><atour@dolby.com> + Athanasios Leontaris <aleon@dolby.com> + Dolby Laboratories Inc. + 3601 West Alameda Ave. + Burbank, CA 91505 + + Saurav K Bandyopadhyay <saurav@ieee.org> + Purvin Pandit <Purvin.Pandit@thomson.net> + Zhenyu Wu <Zhenyu.Wu@thomson.net> + Thomson Inc. + 2 Independence Way + Princeton, NJ 08540 + + Shun-ichi Sekiguchi <Sekiguchi.Shunichi@eb.MitsubishiElectric.co.jp> + Information Technology R&D Center, + Mitsubishi Electric Corporation + 5-1-1, Ofuna, Kamakura, Japan + + Yung-Lyul Lee <yllee@sejong.ac.kr> + Ki-Hun Han <khhan@dms.sejong.ac.kr> + Department of Computer Engineering, + Sejong University + 98 Kunja-Dong, Kwangjin-Gu, Seoul 143-747, Korea + + \endverbatim +*/ + diff --git a/Src/h264dec/ldecod/inc/defines.h b/Src/h264dec/ldecod/inc/defines.h new file mode 100644 index 00000000..edc50563 --- /dev/null +++ b/Src/h264dec/ldecod/inc/defines.h @@ -0,0 +1,273 @@ + +/*! + ************************************************************************** + * \file defines.h + * + * \brief + * Header file containing some useful global definitions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Karsten Sühring <suehring@hhi.de> + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + * + * \date + * 21. March 2001 + ************************************************************************** + */ + + +#ifndef H264_DEFINES_H_ +#define H264_DEFINES_H_ +#pragma once + +#if defined _DEBUG +# define TRACE 0 //!< 0:Trace off 1:Trace on 2:detailed CABAC context information +#else +# define TRACE 0 //!< 0:Trace off 1:Trace on 2:detailed CABAC context information +#endif + +#define JM "16.1 (FRExt)" +#define VERSION "16.1" +#define EXT_VERSION "(FRExt)" + +#define DUMP_DPB 0 //!< Dump DPB info for debug purposes +#define PAIR_FIELDS_IN_OUTPUT 0 //!< Pair field pictures for output purposes +#define IMGTYPE 0 //!< Define imgpel size type. 0 implies byte (cannot handle >8 bit depths) and 1 implies unsigned short +#define ENABLE_FIELD_CTX 1 //!< Enables Field mode related context types for CABAC +#define ENABLE_HIGH444_CTX 1 //!< Enables High 444 profile context types for CABAC. +#define ZEROSNR 0 //!< PSNR computation method +#define ENABLE_OUTPUT_TONEMAPPING 1 //!< enable tone map the output if tone mapping SEI present + + +#include "typedefs.h" + + +#define H264_MEMORY_ALIGNMENT 32 + +//#define MAX_NUM_SLICES 150 +#define MAX_NUM_SLICES 50 +#define MAX_REFERENCE_PICTURES 32 //!< H.264 allows 32 fields +#define MAX_CODED_FRAME_SIZE 8000000 //!< bytes for one frame + +//AVC Profile IDC definitions +#define BASELINE 66 //!< YUV 4:2:0/8 "Baseline" +#define MAIN 77 //!< YUV 4:2:0/8 "Main" +#define EXTENDED 88 //!< YUV 4:2:0/8 "Extended" +#define FREXT_HP 100 //!< YUV 4:2:0/8 "High" +#define FREXT_Hi10P 110 //!< YUV 4:2:0/10 "High 10" +#define FREXT_Hi422 122 //!< YUV 4:2:2/10 "High 4:2:2" +#define FREXT_Hi444 244 //!< YUV 4:4:4/14 "High 4:4:4" +#define FREXT_CAVLC444 44 //!< YUV 4:4:4/14 "CAVLC 4:4:4" + + +#define FILE_NAME_SIZE 255 +#define INPUT_TEXT_SIZE 1024 + +#if (ENABLE_HIGH444_CTX == 1) +# define NUM_BLOCK_TYPES 22 +#else +# define NUM_BLOCK_TYPES 10 +#endif + + +//#define _LEAKYBUCKET_ + +#define BLOCK_SHIFT 2 +#define BLOCK_SIZE 4 +#define BLOCK_SIZE_8x8 8 +#define SMB_BLOCK_SIZE 8 +#define BLOCK_PIXELS 16 +#define MB_BLOCK_SIZE 16 +#define MB_PIXELS 256 // MB_BLOCK_SIZE * MB_BLOCK_SIZE +#define MB_PIXELS_SHIFT 8 // log2(MB_BLOCK_SIZE * MB_BLOCK_SIZE) +#define MB_BLOCK_SHIFT 4 +#define BLOCK_MULTIPLE 4 // (MB_BLOCK_SIZE/BLOCK_SIZE) +#define MB_BLOCK_PARTITIONS 16 // (BLOCK_MULTIPLE * BLOCK_MULTIPLE) +#define BLOCK_CONTEXT 64 // (4 * MB_BLOCK_PARTITIONS) + +// These variables relate to the subpel accuracy supported by the software (1/4) +#define BLOCK_SIZE_SP 16 // BLOCK_SIZE << 2 +#define BLOCK_SIZE_8x8_SP 32 // BLOCK_SIZE8x8 << 2 + +// Available MB modes +enum { + PSKIP = 0, + BSKIP_DIRECT = 0, + P16x16 = 1, + P16x8 = 2, + P8x16 = 3, + SMB8x8 = 4, + SMB8x4 = 5, + SMB4x8 = 6, + SMB4x4 = 7, + P8x8 = 8, + I4MB = 9, + I16MB = 10, + IBLOCK = 11, + SI4MB = 12, + I8MB = 13, + IPCM = 14, + MAXMODE = 15 +} ;//MBModeTypes; + +// number of intra prediction modes +#define NO_INTRA_PMODE 9 + +// Direct Mode types +enum { + DIR_TEMPORAL = 0, //!< Temporal Direct Mode + DIR_SPATIAL = 1 //!< Spatial Direct Mode +} ;//DirectModes; + +// CAVLC block types +enum { + LUMA = 0, + LUMA_INTRA16x16DC = 1, + LUMA_INTRA16x16AC = 2, + CB = 3, + CB_INTRA16x16DC = 4, + CB_INTRA16x16AC = 5, + CR = 8, + CR_INTRA16x16DC = 9, + CR_INTRA16x16AC = 10 +} ;//CAVLCBlockTypes; + +// CABAC block types +enum { + LUMA_16DC = 0, + LUMA_16AC = 1, + LUMA_8x8 = 2, + LUMA_8x4 = 3, + LUMA_4x8 = 4, + LUMA_4x4 = 5, + CHROMA_DC = 6, + CHROMA_AC = 7, + CHROMA_DC_2x4 = 8, + CHROMA_DC_4x4 = 9, + CB_16DC = 10, + CB_16AC = 11, + CB_8x8 = 12, + CB_8x4 = 13, + CB_4x8 = 14, + CB_4x4 = 15, + CR_16DC = 16, + CR_16AC = 17, + CR_8x8 = 18, + CR_8x4 = 19, + CR_4x8 = 20, + CR_4x4 = 21 +} ;//CABACBlockTypes; + +// Macro defines +#define Q_BITS 15 +#define DQ_BITS 6 +#define Q_BITS_8 16 +#define DQ_BITS_8 6 + +//#define IS_INTRA(MB) ((MB)->mb_type==I4MB || (MB)->mb_type==I16MB ||(MB)->mb_type==IPCM || (MB)->mb_type==I8MB || (MB)->mb_type==SI4MB) +#define IS_INTRA(MB) (!!((1 << (MB)->mb_type) & ((1<<I4MB) | (1<<I16MB) | (1<<IPCM) | (1<<I8MB) | (1<<SI4MB)))) +#define IS_I16MB(MB) ((MB)->mb_type==I16MB || (MB)->mb_type==IPCM) + +#define IS_INTER(MB) (!IS_INTRA(MB)) +//#define IS_INTER(MB) ((MB)->mb_type!=SI4MB && (MB)->mb_type!=I4MB && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB && (MB)->mb_type!=IPCM) +#define IS_INTERMV(MB) ((MB)->mb_type!=I4MB && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB && (MB)->mb_type!=0 && (MB)->mb_type!=IPCM) +#define IS_DIRECT(MB) ((MB)->mb_type==0 && (currSlice->slice_type == B_SLICE )) +#define IS_SKIP(MB) ((MB)->mb_type==0 && (currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE)) + +#define TOTRUN_NUM 15 +#define RUNBEFORE_NUM 7 +#define RUNBEFORE_NUM_M1 6 + +// Quantization parameter range +#define MIN_QP 0 +#define MAX_QP 51 +// 4x4 intra prediction modes +enum { + VERT_PRED = 0, + HOR_PRED = 1, + DC_PRED = 2, + DIAG_DOWN_LEFT_PRED = 3, + DIAG_DOWN_RIGHT_PRED = 4, + VERT_RIGHT_PRED = 5, + HOR_DOWN_PRED = 6, + VERT_LEFT_PRED = 7, + HOR_UP_PRED = 8 +} ;//I4x4PredModes; + +// 16x16 intra prediction modes +enum { + VERT_PRED_16 = 0, + HOR_PRED_16 = 1, + DC_PRED_16 = 2, + PLANE_16 = 3 +} ;//I16x16PredModes; + +// 8x8 chroma intra prediction modes +enum { + DC_PRED_8 = 0, + HOR_PRED_8 = 1, + VERT_PRED_8 = 2, + PLANE_8 = 3 +} ;//I8x8PredModes; + +enum { + EOS = 1, //!< End Of Sequence + SOP = 2, //!< Start Of Picture + SOS = 3 //!< Start Of Slice +}; + +// MV Prediction types +enum { + MVPRED_MEDIAN = 0, + MVPRED_L = 1, + MVPRED_U = 2, + MVPRED_UR = 3 +} ;//MVPredTypes; + +enum { + DECODING_OK = 0, + SEARCH_SYNC = 1, + PICTURE_DECODED = 2 +}; + +#define LAMBDA_ACCURACY_BITS 16 +#define INVALIDINDEX (-135792468) + +#define RC_MAX_TEMPORAL_LEVELS 5 + +//Start code and Emulation Prevention need this to be defined in identical manner at encoder and decoder +#define ZEROBYTES_SHORTSTARTCODE 2 //indicates the number of zero bytes in the short start-code prefix + +#define MAX_PLANE 3 +#define IS_INDEPENDENT(IMG) ((IMG)->separate_colour_plane_flag) +#define IS_FREXT_PROFILE(profile_idc) ( profile_idc>=FREXT_HP || profile_idc == FREXT_CAVLC444 ) +#define HI_INTRA_ONLY_PROFILE (((p_Vid->active_sps->profile_idc>=FREXT_Hi10P)&&(p_Vid->active_sps->constrained_set3_flag))||(p_Vid->active_sps->profile_idc==FREXT_CAVLC444)) + +enum +{ + VUI_AR_UNDEFINED = 0, + VUI_AR_SQUARE = 1, // 1:1 + VUI_AR_12_11 = 2, // 12:11 + VUI_AR_10_11 = 3, // 10:11 + VUI_AR_16_11 = 4, // 16:11 + VUI_AR_40_33 = 5, // 40:33 + VUI_AR_24_11 = 6, // 24:11 + VUI_AR_20_11 = 7, // 20:11 + VUI_AR_32_11 = 8, // 32:11 + VUI_AR_80_33 = 9, // 80:33 + VUI_AR_18_11 = 10, // 18:11 + VUI_AR_15_11 = 11, // 15:11 + VUI_AR_64_33 = 12, // 64:33 +VUI_AR_160_99 = 13, // 160:99 +VUI_AR_4_3 = 14, // 4:3 +VUI_AR_3_2 = 15, // 3:2 +VUI_AR_2_1 = 16, // 2:1 + + + VUI_EXTENDED_SAR = 255, +}; +#endif + diff --git a/Src/h264dec/ldecod/inc/elements.h b/Src/h264dec/ldecod/inc/elements.h new file mode 100644 index 00000000..f115bff6 --- /dev/null +++ b/Src/h264dec/ldecod/inc/elements.h @@ -0,0 +1,112 @@ + +/*! + ************************************************************************************* + * \file elements.h + * + * \brief + * Header file for elements in H.264 streams + * + * \date + * 6.10.2000 + * + * \version + * 1.0 + * + * \author + * Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> \n + * Siemens AG, Information and Communication Mobile \n + * P.O.Box 80 17 07 \n + * D-81617 Munich, Germany \n + ************************************************************************************* + */ + +#ifndef _ELEMENTS_H_ +#define _ELEMENTS_H_ + +/*! + * definition of H.264 syntax elements + * order of elements follow dependencies for picture reconstruction + */ +/*! + * \brief Assignment of old TYPE partition elements to new + * elements + * + * old element | new elements + * ----------------+------------------------------------------------------------------- + * TYPE_HEADER | SE_HEADER, SE_PTYPE + * TYPE_MBHEADER | SE_MBTYPE, SE_REFFRAME, SE_INTRAPREDMODE + * TYPE_MVD | SE_MVD + * TYPE_CBP | SE_CBP_INTRA, SE_CBP_INTER + * SE_DELTA_QUANT_INTER + * SE_DELTA_QUANT_INTRA + * TYPE_COEFF_Y | SE_LUM_DC_INTRA, SE_LUM_AC_INTRA, SE_LUM_DC_INTER, SE_LUM_AC_INTER + * TYPE_2x2DC | SE_CHR_DC_INTRA, SE_CHR_DC_INTER + * TYPE_COEFF_C | SE_CHR_AC_INTRA, SE_CHR_AC_INTER + * TYPE_EOS | SE_EOS +*/ + +#define SE_HEADER 0 +#define SE_PTYPE 1 +#define SE_MBTYPE 2 +#define SE_REFFRAME 3 +#define SE_INTRAPREDMODE 4 +#define SE_MVD 5 +#define SE_CBP_INTRA 6 +#define SE_LUM_DC_INTRA 7 +#define SE_CHR_DC_INTRA 8 +#define SE_LUM_AC_INTRA 9 +#define SE_CHR_AC_INTRA 10 +#define SE_CBP_INTER 11 +#define SE_LUM_DC_INTER 12 +#define SE_CHR_DC_INTER 13 +#define SE_LUM_AC_INTER 14 +#define SE_CHR_AC_INTER 15 +#define SE_DELTA_QUANT_INTER 16 +#define SE_DELTA_QUANT_INTRA 17 +#define SE_BFRAME 18 +#define SE_EOS 19 +#define SE_MAX_ELEMENTS 20 + + +#define NO_EC 0 //!< no error concealment necessary +#define EC_REQ 1 //!< error concealment required +#define EC_SYNC 2 //!< search and sync on next header element + +#define MAXPARTITIONMODES 2 //!< maximum possible partition modes as defined in assignSE2partition[][] + +/*! + * \brief lookup-table to assign different elements to partition + * + * \note here we defined up to 6 different partitions similar to + * document Q15-k-18 described in the PROGFRAMEMODE. + * The Sliceheader contains the PSYNC information. \par + * + * Elements inside a partition are not ordered. They are + * ordered by occurence in the stream. + * Assumption: Only partitionlosses are considered. \par + * + * The texture elements luminance and chrominance are + * not ordered in the progressive form + * This may be changed in image.c \par + * + * We also defined the proposed internet partition mode + * of Stephan Wenger here. To select the desired mode + * uncomment one of the two following lines. \par + * + * -IMPORTANT: + * Picture- or Sliceheaders must be assigned to partition 0. \par + * Furthermore partitions must follow syntax dependencies as + * outlined in document Q15-J-23. + */ + + +static const byte assignSE2partition[][SE_MAX_ELEMENTS] = +{ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 // element number (do not uncomment) + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, //!< all elements in one partition no data partitioning + { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 } //!< three partitions per slice +}; + + +#endif + diff --git a/Src/h264dec/ldecod/inc/erc_api.h b/Src/h264dec/ldecod/inc/erc_api.h new file mode 100644 index 00000000..428c4ed3 --- /dev/null +++ b/Src/h264dec/ldecod/inc/erc_api.h @@ -0,0 +1,159 @@ + +/*! + ************************************************************************ + * \file erc_api.h + * + * \brief + * External (still inside video decoder) interface for error concealment module + * + * \author + * - Ari Hourunranta <ari.hourunranta@nokia.com> + * - Ye-Kui Wang <wyk@ieee.org> + * - Jill Boyce <jill.boyce@thomson.net> + * - Saurav K Bandyopadhyay <saurav@ieee.org> + * - Zhenyu Wu <Zhenyu.Wu@thomson.net + * - Purvin Pandit <Purvin.Pandit@thomson.net> + * + * ************************************************************************ + */ + + +#ifndef _ERC_API_H_ +#define _ERC_API_H_ + +#include "erc_globals.h" + +/* +* Defines +*/ + +/* If the average motion vector of the correctly received macroblocks is less than the +threshold, concealByCopy is used, otherwise concealByTrial is used. */ +#define MVPERMB_THR 8 + +/* used to determine the size of the allocated memory for a temporal Region (MB) */ +#define DEF_REGION_SIZE 384 /* 8*8*6 */ + +#define ERC_BLOCK_OK 3 +#define ERC_BLOCK_CONCEALED 2 +#define ERC_BLOCK_CORRUPTED 1 +#define ERC_BLOCK_EMPTY 0 + + +/* +* Functions to convert MBNum representation to blockNum +*/ + +#define xPosYBlock(currYBlockNum,picSizeX) \ +((currYBlockNum)%((picSizeX)>>3)) + +#define yPosYBlock(currYBlockNum,picSizeX) \ +((currYBlockNum)/((picSizeX)>>3)) + +#define xPosMB(currMBNum,picSizeX) \ +((currMBNum)%((picSizeX)>>4)) + +#define yPosMB(currMBNum,picSizeX) \ +((currMBNum)/((picSizeX)>>4)) + +#define MBxy2YBlock(currXPos,currYPos,comp,picSizeX) \ +((((currYPos)<<1)+((comp)>>1))*((picSizeX)>>3)+((currXPos)<<1)+((comp)&1)) + +#define MBNum2YBlock(currMBNum,comp,picSizeX) \ +MBxy2YBlock(xPosMB((currMBNum),(picSizeX)),yPosMB((currMBNum),(picSizeX)),(comp),(picSizeX)) + + +/* +* typedefs +*/ + +/* segment data structure */ +typedef struct ercSegment_s +{ + int startMBPos; + int endMBPos; + int fCorrupted; +} ercSegment_t; + +/* Error detector & concealment instance data structure */ +typedef struct ercVariables_s +{ + /* Number of macroblocks (size or size/4 of the arrays) */ + int nOfMBs; + /* Number of segments (slices) in frame */ + int nOfSegments; + + /* Array for conditions of Y blocks */ + int *yCondition; + /* Array for conditions of U blocks */ + int *uCondition; + /* Array for conditions of V blocks */ + int *vCondition; + + /* Array for Slice level information */ + ercSegment_t *segments; + int currSegment; + + /* Conditions of the MBs of the previous frame */ + int *prevFrameYCondition; + + /* Flag telling if the current segment was found to be corrupted */ + int currSegmentCorrupted; + /* Counter for corrupted segments per picture */ + int nOfCorruptedSegments; + + /* State variables for error detector and concealer */ + int concealment; + +} ercVariables_t; + +/* +* External function interface +*/ + +void ercInit(VideoParameters *p_Vid, int pic_sizex, int pic_sizey, int flag); +ercVariables_t *ercOpen( void ); +void ercReset( ercVariables_t *errorVar, int nOfMBs, int numOfSegments, int picSizeX ); +void ercClose( VideoParameters *p_Vid, ercVariables_t *errorVar ); +void ercSetErrorConcealment( ercVariables_t *errorVar, int value ); + +void ercStartSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar ); +void ercStopSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar ); +void ercMarkCurrSegmentLost(int picSizeX, ercVariables_t *errorVar ); +void ercMarkCurrSegmentOK(int picSizeX, ercVariables_t *errorVar ); +void ercMarkCurrMBConcealed( int currMBNum, int comp, int picSizeX, ercVariables_t *errorVar ); + +int ercConcealIntraFrame( VideoParameters *p_Vid, frame *recfr, int picSizeX, int picSizeY, ercVariables_t *errorVar ); +int ercConcealInterFrame( frame *recfr, objectBuffer_t *object_list, + int picSizeX, int picSizeY, ercVariables_t *errorVar, int chroma_format_idc ); + + +/* Thomson APIs for concealing entire frame loss */ + +#include "mbuffer.h" +#include "output.h" + +struct concealment_node { + StorablePicture* picture; + int missingpocs; + struct concealment_node *next; +}; + +extern struct concealment_node * init_node(StorablePicture* , int ); +extern void print_node( struct concealment_node * ); +extern void print_list( struct concealment_node * ); +extern void init_lists_for_non_reference_loss(VideoParameters *p_Vid, int , PictureStructure ); + +extern void conceal_non_ref_pics(VideoParameters *p_Vid, int diff); +extern void conceal_lost_frames(VideoParameters *p_Vid); + +extern void sliding_window_poc_management(DecodedPictureBuffer *p_Dpb, StorablePicture *p); + +extern void write_lost_non_ref_pic(VideoParameters *p_Vid, int poc); +extern void write_lost_ref_after_idr(VideoParameters *p_Vid, int pos); + +extern int comp(const void *, const void *); + + +#endif + diff --git a/Src/h264dec/ldecod/inc/erc_do.h b/Src/h264dec/ldecod/inc/erc_do.h new file mode 100644 index 00000000..9879222f --- /dev/null +++ b/Src/h264dec/ldecod/inc/erc_do.h @@ -0,0 +1,44 @@ + +/*! + ************************************************************************ + * \file erc_do.h + * + * \brief + * Header for the I & P frame error concealment common functions + * + * \author + * - Viktor Varsa <viktor.varsa@nokia.com> + * - Ye-Kui Wang <wyk@ieee.org> + * + ************************************************************************ + */ + +#ifndef _ERC_DO_H_ +#define _ERC_DO_H_ + + +#include "erc_api.h" + +void ercPixConcealIMB (VideoParameters *p_Vid, imgpel *currFrame, int row, int column, int predBlocks[], int frameWidth, int mbWidthInBlocks); + +int ercCollect8PredBlocks( int predBlocks[], int currRow, int currColumn, int *condition, + int maxRow, int maxColumn, int step, byte fNoCornerNeigh ); +int ercCollectColumnBlocks( int predBlocks[], int currRow, int currColumn, int *condition, int maxRow, int maxColumn, int step ); + +#define isSplitted(object_list,currMBNum) \ + ((object_list+((currMBNum)<<2))->regionMode >= REGMODE_SPLITTED) + +/* this can be used as isBlock(...,INTRA) or isBlock(...,INTER_COPY) */ +#define isBlock(object_list,currMBNum,comp,regMode) \ + (isSplitted(object_list,currMBNum) ? \ + ((object_list+((currMBNum)<<2)+(comp))->regionMode == REGMODE_##regMode##_8x8) : \ + ((object_list+((currMBNum)<<2))->regionMode == REGMODE_##regMode)) + +/* this can be used as getParam(...,mv) or getParam(...,xMin) or getParam(...,yMin) */ +#define getParam(object_list,currMBNum,comp,param) \ + (isSplitted(object_list,currMBNum) ? \ + ((object_list+((currMBNum)<<2)+(comp))->param) : \ + ((object_list+((currMBNum)<<2))->param)) + +#endif + diff --git a/Src/h264dec/ldecod/inc/erc_globals.h b/Src/h264dec/ldecod/inc/erc_globals.h new file mode 100644 index 00000000..63ba4e2e --- /dev/null +++ b/Src/h264dec/ldecod/inc/erc_globals.h @@ -0,0 +1,52 @@ + +/*! + ************************************************************************ + * \file erc_globals.h + * + * \brief + * global header file for error concealment module + * + * \author + * - Viktor Varsa <viktor.varsa@nokia.com> + * - Ye-Kui Wang <wyk@ieee.org> + ************************************************************************ + */ + +#ifndef _ERC_GLOBALS_H_ +#define _ERC_GLOBALS_H_ + +#include "defines.h" + +/* "block" means an 8x8 pixel area */ + +/* Region modes */ +#define REGMODE_INTER_COPY 0 //!< Copy region +#define REGMODE_INTER_PRED 1 //!< Inter region with motion vectors +#define REGMODE_INTRA 2 //!< Intra region +#define REGMODE_SPLITTED 3 //!< Any region mode higher than this indicates that the region + //!< is splitted which means 8x8 block +#define REGMODE_INTER_COPY_8x8 4 +#define REGMODE_INTER_PRED_8x8 5 +#define REGMODE_INTRA_8x8 6 + +//! YUV pixel domain image arrays for a video frame +typedef struct frame_s +{ + VideoParameters *p_Vid; + imgpel *yptr; + imgpel *uptr; + imgpel *vptr; +} frame; + +//! region structure stores information about a region that is needed for concealment +typedef struct object_buffer +{ + byte regionMode; //!< region mode as above + int xMin; //!< X coordinate of the pixel position of the top-left corner of the region + int yMin; //!< Y coordinate of the pixel position of the top-left corner of the region + short mv[3]; //!< motion vectors in 1/4 pixel units: mvx = mv[0], mvy = mv[1], + //!< and ref_frame = mv[2] +} objectBuffer_t; + +#endif + diff --git a/Src/h264dec/ldecod/inc/errorconcealment.h b/Src/h264dec/ldecod/inc/errorconcealment.h new file mode 100644 index 00000000..36650e25 --- /dev/null +++ b/Src/h264dec/ldecod/inc/errorconcealment.h @@ -0,0 +1,20 @@ + + +/*! + **************************************************************************** + * \file errorconcealment.h + * + * \brief + * Header file for errorconcealment.c + * + **************************************************************************** + */ + +#ifndef _ERRORCONCEALMENT_H_ +#define _ERRORCONCEALMENT_H_ + +extern int set_ec_flag(VideoParameters *p_Vid, int se); +extern void reset_ec_flags(VideoParameters *p_Vid); + +#endif + diff --git a/Src/h264dec/ldecod/inc/fmo.h b/Src/h264dec/ldecod/inc/fmo.h new file mode 100644 index 00000000..df749bba --- /dev/null +++ b/Src/h264dec/ldecod/inc/fmo.h @@ -0,0 +1,30 @@ + +/*! + *************************************************************************** + * + * \file fmo.h + * + * \brief + * Support for Flexilble Macroblock Ordering (FMO) + * + * \date + * 19 June, 2002 + * + * \author + * Stephan Wenger stewe@cs.tu-berlin.de + **************************************************************************/ + +#ifndef _FMO_H_ +#define _FMO_H_ + + +extern int fmo_init (VideoParameters *p_Vid); +extern int FmoFinit (VideoParameters *p_Vid); + +extern int FmoGetNumberOfSliceGroup(VideoParameters *p_Vid); +extern int FmoGetLastMBOfPicture (VideoParameters *p_Vid); +extern int FmoGetLastMBInSliceGroup(VideoParameters *p_Vid, int SliceGroup); +extern int FmoGetSliceGroupId (VideoParameters *p_Vid, int mb); +extern int FmoGetNextMBNr (VideoParameters *p_Vid, int CurrentMbNr); + +#endif diff --git a/Src/h264dec/ldecod/inc/global.h b/Src/h264dec/ldecod/inc/global.h new file mode 100644 index 00000000..6d2677e6 --- /dev/null +++ b/Src/h264dec/ldecod/inc/global.h @@ -0,0 +1,1230 @@ + +/*! + ************************************************************************ + * \file + * global.h + * \brief + * global definitions for H.264 decoder. + * \author + * Copyright (C) 1999 Telenor Satellite Services,Norway + * Ericsson Radio Systems, Sweden + * + * Inge Lille-Langoy <inge.lille-langoy@telenor.com> + * + * Telenor Satellite Services + * Keysers gt.13 tel.: +47 23 13 86 98 + * N-0130 Oslo,Norway fax.: +47 22 77 79 80 + * + * Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + * + * Ericsson Radio Systems + * KI/ERA/T/VV + * 164 80 Stockholm, Sweden + * + ************************************************************************ + */ +#ifndef _GLOBAL_H_ +#define _GLOBAL_H_ + +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include <sys/timeb.h> + +#include <bfc/platform/types.h> +#include "win32.h" +#include "defines.h" +#include "ifunctions.h" +#include "parsetcommon.h" +#include "types.h" +#include "frame.h" +#include "nalucommon.h" +#include "memcache.h" +#include <mmintrin.h> +#ifdef H264_IPP +//#include "../tools/staticlib/ipp_px.h" +#include "ippdefs.h" +#include "ippcore.h" +#include "ipps.h" +#include "ippi.h" +#include "ippvc.h" +#endif +/* benski> not the best place for this but it works for now */ +#ifdef _M_IX86 +// must be a multiple of 16 +#pragma warning(disable: 4799) +static inline void memzero_cache32(void *dst, unsigned long i) +{ + + __asm { + pxor mm0, mm0 + mov edi, dst + +loopwrite: + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + lea edi, [edi+32] + sub i, 32 + jg loopwrite + + } +} + +static inline void memzero_fast32(void *dst, unsigned long i) +{ + + __asm { + pxor mm0, mm0 + mov edi, dst + +loopwrite: + + movntq 0[edi], mm0 + movntq 8[edi], mm0 + movntq 16[edi], mm0 + movntq 24[edi], mm0 + + lea edi, [edi+32] + sub i, 32 + jg loopwrite + + } +} + +static inline void memzero64(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + movq 32[edi], mm0 + movq 40[edi], mm0 + movq 48[edi], mm0 + movq 56[edi], mm0 + } +} + +static inline void memzero128(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + movq 32[edi], mm0 + movq 40[edi], mm0 + movq 48[edi], mm0 + movq 56[edi], mm0 + movq 64[edi], mm0 + movq 72[edi], mm0 + movq 80[edi], mm0 + movq 88[edi], mm0 + movq 96[edi], mm0 + movq 104[edi], mm0 + movq 112[edi], mm0 + movq 120[edi], mm0 + } +} + +static inline void memzero24(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + } +} + +static inline void memzero48(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + movq 32[edi], mm0 + movq 40[edi], mm0 + } +} + +static inline void memzero16(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + } +} + +static inline void memzero8(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + } +} + +static inline void memset_fast_end() +{ + _mm_empty(); +} + +// Very optimized memcpy() routine for all AMD Athlon and Duron family. +// This code uses any of FOUR different basic copy methods, depending +// on the transfer size. +// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or +// "Streaming Store"), and also uses the software prefetchnta instructions, +// be sure youre running on Athlon/Duron or other recent CPU before calling! + +#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy +// The smallest copy uses the X86 "movsd" instruction, in an optimized +// form which is an "unrolled loop". + +#define IN_CACHE_COPY 64 * 1024 // upper limit for movq/movq copy w/SW prefetch +// Next is a copy that uses the MMX registers to copy 8 bytes at a time, +// also using the "unrolled loop" optimization. This code uses +// the software prefetch instruction to get the data into the cache. + +#define UNCACHED_COPY 197 * 1024 // upper limit for movq/movntq w/SW prefetch +// For larger blocks, which will spill beyond the cache, its faster to +// use the Streaming Store instruction MOVNTQ. This write instruction +// bypasses the cache and writes straight to main memory. This code also +// uses the software prefetch instruction to pre-read the data. +// USE 64 * 1024 FOR THIS VALUE IF YOURE ALWAYS FILLING A "CLEAN CACHE" + +#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch +#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch +// For the largest size blocks, a special technique called Block Prefetch +// can be used to accelerate the read operations. Block Prefetch reads +// one address per cache line, for a series of cache lines, in a short loop. +// This is faster than using software prefetch. The technique is great for +// getting maximum read bandwidth, especially in DDR memory systems. + +// Inline assembly syntax for use with Visual C++ + + +static void * memcpy_amd(void *dest, const void *src, size_t n) +{ + __asm { + + mov ecx, [n] // number of bytes to copy + mov edi, [dest] // destination + mov esi, [src] // source + mov ebx, ecx // keep a copy of count + + cld + cmp ecx, TINY_BLOCK_COPY + jb $memcpy_ic_3 // tiny? skip mmx copy + + cmp ecx, 32*1024 // dont align between 32k-64k because + jbe $memcpy_do_align // it appears to be slower + cmp ecx, 64*1024 + jbe $memcpy_align_done +$memcpy_do_align: + mov ecx, 8 // a trick thats faster than rep movsb... + sub ecx, edi // align destination to qword + and ecx, 111b // get the low bits + sub ebx, ecx // update copy count + neg ecx // set up to jump into the array + add ecx, offset $memcpy_align_done + jmp ecx // jump to array of movsbs + +align 4 + movsb + movsb + movsb + movsb + movsb + movsb + movsb + movsb + +$memcpy_align_done: // destination is dword aligned + mov ecx, ebx // number of bytes left to copy + shr ecx, 6 // get 64-byte block count + jz $memcpy_ic_2 // finish the last few bytes + + cmp ecx, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy + jae $memcpy_uc_test + +// This is small block copy that uses the MMX registers to copy 8 bytes +// at a time. It uses the "unrolled loop" optimization, and also uses +// the software prefetch instruction to get the data into the cache. +align 16 +$memcpy_ic_1: // 64-byte block copies, in-cache copy + + prefetchnta [esi + (200*64/34+192)] // start reading ahead + + movq mm0, [esi+0] // read 64 bits + movq mm1, [esi+8] + movq [edi+0], mm0 // write 64 bits + movq [edi+8], mm1 // note: the normal movq writes the + movq mm2, [esi+16] // data to cache// a cache line will be + movq mm3, [esi+24] // allocated as needed, to store the data + movq [edi+16], mm2 + movq [edi+24], mm3 + movq mm0, [esi+32] + movq mm1, [esi+40] + movq [edi+32], mm0 + movq [edi+40], mm1 + movq mm2, [esi+48] + movq mm3, [esi+56] + movq [edi+48], mm2 + movq [edi+56], mm3 + + add esi, 64 // update source pointer + add edi, 64 // update destination pointer + dec ecx // count down + jnz $memcpy_ic_1 // last 64-byte block? + +$memcpy_ic_2: + mov ecx, ebx // has valid low 6 bits of the byte count +$memcpy_ic_3: + shr ecx, 2 // dword count + and ecx, 1111b // only look at the "remainder" bits + neg ecx // set up to jump into the array + add ecx, offset $memcpy_last_few + jmp ecx // jump to array of movsds + +$memcpy_uc_test: + cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy + jae $memcpy_bp_1 + +$memcpy_64_test: + or ecx, ecx // _tail end of block prefetch will jump here + jz $memcpy_ic_2 // no more 64-byte blocks left + +// For larger blocks, which will spill beyond the cache, its faster to +// use the Streaming Store instruction MOVNTQ. This write instruction +// bypasses the cache and writes straight to main memory. This code also +// uses the software prefetch instruction to pre-read the data. +align 16 +$memcpy_uc_1: // 64-byte blocks, uncached copy + + prefetchnta [esi + (200*64/34+192)] // start reading ahead + + movq mm0,[esi+0] // read 64 bits + add edi,64 // update destination pointer + movq mm1,[esi+8] + add esi,64 // update source pointer + movq mm2,[esi-48] + movntq [edi-64], mm0 // write 64 bits, bypassing the cache + movq mm0,[esi-40] // note: movntq also prevents the CPU + movntq [edi-56], mm1 // from READING the destination address + movq mm1,[esi-32] // into the cache, only to be over-written + movntq [edi-48], mm2 // so that also helps performance + movq mm2,[esi-24] + movntq [edi-40], mm0 + movq mm0,[esi-16] + movntq [edi-32], mm1 + movq mm1,[esi-8] + movntq [edi-24], mm2 + movntq [edi-16], mm0 + dec ecx + movntq [edi-8], mm1 + jnz $memcpy_uc_1 // last 64-byte block? + + jmp $memcpy_ic_2 // almost done + +// For the largest size blocks, a special technique called Block Prefetch +// can be used to accelerate the read operations. Block Prefetch reads +// one address per cache line, for a series of cache lines, in a short loop. +// This is faster than using software prefetch, in this case. +// The technique is great for getting maximum read bandwidth, +// especially in DDR memory systems. +$memcpy_bp_1: // large blocks, block prefetch copy + + cmp ecx, CACHEBLOCK // big enough to run another prefetch loop? + jl $memcpy_64_test // no, back to regular uncached copy + + mov eax, CACHEBLOCK / 2 // block prefetch loop, unrolled 2X + add esi, CACHEBLOCK * 64 // move to the top of the block +align 16 +$memcpy_bp_2: + mov edx, [esi-64] // grab one address per cache line + mov edx, [esi-128] // grab one address per cache line + sub esi, 128 // go reverse order + dec eax // count down the cache lines + jnz $memcpy_bp_2 // keep grabbing more lines into cache + + mov eax, CACHEBLOCK // now that its in cache, do the copy +align 16 +$memcpy_bp_3: + movq mm0, [esi ] // read 64 bits + movq mm1, [esi+ 8] + movq mm2, [esi+16] + movq mm3, [esi+24] + movq mm4, [esi+32] + movq mm5, [esi+40] + movq mm6, [esi+48] + movq mm7, [esi+56] + add esi, 64 // update source pointer + movntq [edi ], mm0 // write 64 bits, bypassing cache + movntq [edi+ 8], mm1 // note: movntq also prevents the CPU + movntq [edi+16], mm2 // from READING the destination address + movntq [edi+24], mm3 // into the cache, only to be over-written, + movntq [edi+32], mm4 // so that also helps performance + movntq [edi+40], mm5 + movntq [edi+48], mm6 + movntq [edi+56], mm7 + add edi, 64 // update dest pointer + + dec eax // count down + + jnz $memcpy_bp_3 // keep copying + sub ecx, CACHEBLOCK // update the 64-byte block count + jmp $memcpy_bp_1 // keep processing chunks + +// The smallest copy uses the X86 "movsd" instruction, in an optimized +// form which is an "unrolled loop". Then it handles the last few bytes. +align 4 + movsd + movsd // perform last 1-15 dword copies + movsd + movsd + movsd + movsd + movsd + movsd + movsd + movsd // perform last 1-7 dword copies + movsd + movsd + movsd + movsd + movsd + movsd + +$memcpy_last_few: // dword aligned from before movsds + mov ecx, ebx // has valid low 2 bits of the byte count + and ecx, 11b // the last few cows must come home + jz $memcpy_final // no more, lets leave + rep movsb // the last 1, 2, or 3 bytes + +$memcpy_final: +// emms // clean up the MMX state + sfence // flush the write buffer + mov eax, [dest] // ret value = destination pointer + + } +} + +#elif defined(_M_X64) +static inline void memzero24(void *dst) +{ + int32_t j; + int32_t *d = (int32_t *)dst; + for (j=0;j<24;j+=4) + { + d[j] = 0; + } +} +static inline void memset_fast_end() {} +#else +static inline void memzero_fast16(void *dst, unsigned long i) +{ + int32_t j; + int32_t *d = (int32_t *)dst; + for (j=0;j<i;j+=4) + { + d[j] = 0; + } +} +static inline void memzero24(void *dst) +{ + int32_t j; + int32_t *d = (int32_t *)dst; + for (j=0;j<24;j+=4) + { + d[j] = 0; + } +} +static inline void memset_fast_end() {} +#endif + +#define UNDEFINED_REFERENCE ((int)0x80000000) +typedef int32_t h264_ref_t; + +#define ET_SIZE 300 //!< size of error text buffer +extern char errortext[ET_SIZE]; //!< buffer for error message for exit with error() +extern int sse2_flag, mmx_flag, sse_flag, sse3_flag, sse4_1_flag; +/*********************************************************************** + * T y p e d e f i n i t i o n s f o r J M + *********************************************************************** + */ + +typedef enum +{ + LumaComp = 0, + CrComp = 1, + CbComp = 2 +} Color_Component; + +/*********************************************************************** + * D a t a t y p e s f o r C A B A C + *********************************************************************** + */ + +typedef struct pix_pos +{ + int available; + int mb_addr; + short x; + short y; + short pos_x; + short pos_y; +} PixelPos; + +//! struct to characterize the state of the arithmetic coding engine +typedef struct +{ + unsigned int Drange; + unsigned int Dvalue; + int DbitsLeft; + byte *Dcodestrm; + int *Dcodestrm_len; +} DecodingEnvironment; + +typedef DecodingEnvironment *DecodingEnvironmentPtr; + +typedef short MotionVector[2]; + +//! definition of motion parameters +typedef struct pic_motion +{ + h264_ref_t ref_pic_id; + h264_ref_t ref_id; + MotionVector mv; + char ref_idx; +} PicMotion; + +// TODO: benski> might be more efficient to make a [list][subblock_y][subblock_x] array of these values instead of parallel arrays +typedef struct motion_params +{ + PicMotion **motion[2]; + byte ** moving_block; +} MotionParams; + +//! struct for context management +typedef struct +{ + uint16_t state; // index into state-table CP + unsigned char MPS; // Least Probable Symbol 0/1 CP + unsigned char dummy; // for alignment +} BiContextType; + +typedef BiContextType *BiContextTypePtr; + + +/********************************************************************** + * C O N T E X T S F O R T M L S Y N T A X E L E M E N T S + ********************************************************************** + */ + +#define NUM_MB_TYPE_CTX 11 +#define NUM_B8_TYPE_CTX 9 +#define NUM_MV_RES_CTX 10 +#define NUM_REF_NO_CTX 6 +#define NUM_DELTA_QP_CTX 4 +#define NUM_MB_AFF_CTX 4 +#define NUM_TRANSFORM_SIZE_CTX 3 + +// structures that will be declared somewhere else +struct storable_picture; +struct datapartition; +struct syntaxelement; + +typedef struct +{ + BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX]; + BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX]; + BiContextType mv_res_contexts [2][NUM_MV_RES_CTX]; + BiContextType ref_no_contexts [2][NUM_REF_NO_CTX]; + BiContextType delta_qp_contexts[NUM_DELTA_QP_CTX]; + BiContextType mb_aff_contexts [NUM_MB_AFF_CTX]; +} MotionInfoContexts; + +#define NUM_IPR_CTX 2 +#define NUM_CIPR_CTX 4 +#define NUM_CBP_CTX 4 +#define NUM_BCBP_CTX 4 +#define NUM_MAP_CTX 15 +#define NUM_LAST_CTX 15 +#define NUM_ONE_CTX 5 +#define NUM_ABS_CTX 5 + + +typedef struct +{ + BiContextType transform_size_contexts [NUM_TRANSFORM_SIZE_CTX]; + BiContextType ipr_contexts [NUM_IPR_CTX]; + BiContextType cipr_contexts[NUM_CIPR_CTX]; + BiContextType cbp_contexts [3][NUM_CBP_CTX]; + BiContextType bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX]; + BiContextType map_contexts [2][NUM_BLOCK_TYPES][NUM_MAP_CTX+1]; // +1 for better alignment + BiContextType last_contexts[2][NUM_BLOCK_TYPES][NUM_LAST_CTX+1]; // +1 for better alignment + BiContextType one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX]; + BiContextType abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX]; +} TextureInfoContexts; + + +//*********************** end of data type definition for CABAC ******************* + +/*********************************************************************** + * N e w D a t a t y p e s f o r T M L + *********************************************************************** + */ + +/*! Buffer structure for decoded reference picture marking commands */ +typedef struct DecRefPicMarking_s +{ + int memory_management_control_operation; + int difference_of_pic_nums_minus1; + int long_term_pic_num; + int long_term_frame_idx; + int max_long_term_frame_idx_plus1; + struct DecRefPicMarking_s *Next; +} DecRefPicMarking_t; + + +//! definition of pic motion parameters +typedef struct pic_motion_params2 +{ + h264_ref_t ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x] + h264_ref_t ref_id; //!< reference picture identifier [list][subblock_y][subblock_x] + short mv[2]; //!< motion vector [list][subblock_x][subblock_y][component] + char ref_idx; //!< reference picture [list][subblock_y][subblock_x] + byte mb_field; //!< field macroblock indicator + byte field_frame; //!< indicates if co_located is field or frame. +} PicMotionParams2; + +//! Macroblock +typedef struct macroblock +{ + struct slice *p_Slice; //!< pointer to the current slice + struct img_par *p_Vid; //!< pointer to VideoParameters + struct inp_par *p_Inp; + int mbAddrX; //!< current MB address + int mb_x; + int mb_y; + int block_x; + int block_y; + int block_y_aff; + int pix_x; + int pix_y; + int pix_c_x; + int pix_c_y; + + int subblock_x; + int subblock_y; + + int qp; //!< QP luma + int qpc[2]; //!< QP chroma + int qp_scaled[MAX_PLANE]; //!< QP scaled for all comps. + Boolean is_lossless; + Boolean is_intra_block; + Boolean is_v_block; + + short slice_nr; + short delta_quant; //!< for rate control + + struct macroblock *mb_up; //!< pointer to neighboring MB (CABAC) + struct macroblock *mb_left; //!< pointer to neighboring MB (CABAC) + + // some storage of macroblock syntax elements for global access + int mb_type; + short mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2]; //!< indices correspond to [forw,backw][block_y][block_x][x,y] + int cbp; + int64 cbp_blk [3]; + int64 cbp_bits [3]; + int64 cbp_bits_8x8[3]; + + int i16mode; + char b8mode[4]; + char b8pdir[4]; + char ei_flag; //!< error indicator flag that enables concealment + char dpl_flag; //!< error indicator flag that signals a missing data partition + char ipmode_DPCM; + + short DFDisableIdc; + short DFAlphaC0Offset; + short DFBetaOffset; + + char c_ipred_mode; //!< chroma intra prediction mode + Boolean mb_field; + + int skip_flag; + + int mb_addr_left, mb_addr_up, mb_addr_upper_right, mb_addr_upper_left; + Boolean mb_avail_left, mb_avail_up, mb_avail_upper_right, mb_avail_upper_left; + + Boolean luma_transform_size_8x8_flag; + Boolean NoMbPartLessThan8x8Flag; + + void (*itrans_8x8)(struct macroblock *currMB, ColorPlane pl, int ioff, int joff); + + void (*GetMVPredictor) (struct macroblock *currMB, PixelPos *block, + short pmv[2], short ref_frame, struct pic_motion **motion, int mb_x, int mb_y, int blockshape_x, int blockshape_y); + + int (*read_and_store_CBP_block_bit) (struct macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type); + char (*readRefPictureIdx) (struct syntaxelement *currSE, struct datapartition *dP, int list); + +} Macroblock; + +//! Syntaxelement +typedef struct syntaxelement +{ + int value1; //!< numerical value of syntax element + int value2; //!< for blocked symbols, e.g. run/level + int len; //!< length of code + //int inf; //!< info part of CAVLC code + +#if TRACE + #define TRACESTRING_SIZE 100 //!< size of trace string + char tracestring[TRACESTRING_SIZE]; //!< trace string +#endif + + //! for mapping of CAVLC to syntaxElement + void (*mapping)(int len, int info, int *value1, int *value2); +} SyntaxElement; + + +//! Bitstream +typedef struct +{ + // CABAC Decoding + int read_len; //!< actual position in the codebuffer, CABAC only + int code_len; //!< overall codebuffer length, CABAC only + // CAVLC Decoding + int frame_bitoffset; //!< actual position in the codebuffer, bit-oriented, CAVLC only + int bitstream_length; //!< over codebuffer lnegth, byte oriented, CAVLC only + + byte *streamBuffer; //!< actual codebuffer for read bytes +} Bitstream; + + +/* === 4x4 block typedefs === */ +// 32 bit precision +typedef int h264_int_block_row_t[BLOCK_SIZE]; +typedef h264_int_block_row_t h264_int_block_t[BLOCK_SIZE]; +// 16 bit precision +typedef int16_t h264_short_block_row_t[BLOCK_SIZE]; +typedef h264_short_block_row_t h264_short_block_t[BLOCK_SIZE]; +// 8 bit precision + +/* === 8x8 block typedefs === */ +// 32 bit precision +typedef int h264_int_8x8block_row_t[BLOCK_SIZE_8x8]; +typedef h264_int_8x8block_row_t h264_int_8x8block_t[BLOCK_SIZE_8x8]; +// 16 bit precision +typedef int16_t h264_short_8x8block_row_t[BLOCK_SIZE_8x8]; +typedef h264_short_8x8block_row_t h264_short_8x8block_t[BLOCK_SIZE_8x8]; +// 8 bit precision +typedef imgpel h264_imgpel_8x8block_row_t[BLOCK_SIZE_8x8]; +typedef h264_imgpel_8x8block_row_t h264_imgpel_8x8block_t[BLOCK_SIZE_8x8]; + +/* === 16x16 block typedefs === */ +// 32 bit precision +typedef int h264_int_macroblock_row_t[MB_BLOCK_SIZE]; +typedef h264_int_macroblock_row_t h264_int_macroblock_t[MB_BLOCK_SIZE]; +// 16 bit precision +typedef int16_t h264_short_macroblock_row_t[MB_BLOCK_SIZE]; +typedef h264_short_macroblock_row_t h264_short_macroblock_t[MB_BLOCK_SIZE]; +// 8 bit precision +typedef imgpel h264_imgpel_macroblock_row_t[MB_BLOCK_SIZE]; +typedef h264_imgpel_macroblock_row_t h264_imgpel_macroblock_t[MB_BLOCK_SIZE]; + + + + +typedef int h264_pic_position[2]; +typedef byte h264_4x4_byte[BLOCK_SIZE][BLOCK_SIZE]; +typedef h264_4x4_byte h264_nz_coefficient[3]; + +//! DataPartition +typedef struct datapartition +{ + + Bitstream *bitstream; + DecodingEnvironment de_cabac; + +} DataPartition; + +//! Slice +typedef struct slice +{ + struct img_par *p_Vid; + struct inp_par *p_Inp; + pic_parameter_set_rbsp_t *active_pps; + seq_parameter_set_rbsp_t *active_sps; + + struct colocated_params *p_colocated; + struct colocated_params *Co_located_JV[MAX_PLANE]; //!< p_colocated to be used during 4:4:4 independent mode decoding + + int mb_aff_frame_flag; + int direct_spatial_mv_pred_flag; //!< Indicator for direct mode type (1 for Spatial, 0 for Temporal) + int num_ref_idx_l0_active; //!< number of available list 0 references + int num_ref_idx_l1_active; //!< number of available list 1 references + + int qp; + int slice_qp_delta; + int qs; + int slice_qs_delta; + int slice_type; //!< slice type + int model_number; //!< cabac model number + PictureStructure structure; //!< Identify picture structure type + int start_mb_nr; //!< MUST be set by NAL even in case of ei_flag == 1 + int max_part_nr; + int dp_mode; //!< data partitioning mode + int last_dquant; + + // int last_mb_nr; //!< only valid when entropy coding == CABAC + DataPartition *partArr; //!< array of partitions + MotionInfoContexts *mot_ctx; //!< pointer to struct of context models for use in CABAC + TextureInfoContexts *tex_ctx; //!< pointer to struct of context models for use in CABAC + + int mvscale[6][MAX_REFERENCE_PICTURES]; + + int ref_pic_list_reordering_flag_l0; + int *reordering_of_pic_nums_idc_l0; + int *abs_diff_pic_num_minus1_l0; + int *long_term_pic_idx_l0; + int ref_pic_list_reordering_flag_l1; + int *reordering_of_pic_nums_idc_l1; + int *abs_diff_pic_num_minus1_l1; + int *long_term_pic_idx_l1; + + + short DFDisableIdc; //!< Disable deblocking filter on slice + short DFAlphaC0Offset; //!< Alpha and C0 offset for filtering slice + short DFBetaOffset; //!< Beta offset for filtering slice + + int pic_parameter_set_id; //!<the ID of the picture parameter set the slice is reffering to + + int dpB_NotPresent; //!< non-zero, if data partition B is lost + int dpC_NotPresent; //!< non-zero, if data partition C is lost + + + __declspec(align(32)) h264_imgpel_macroblock_t mb_pred[MAX_PLANE]; + __declspec(align(32)) h264_imgpel_macroblock_t mb_rec[MAX_PLANE]; + __declspec(align(32)) union + { + __declspec(align(32)) h264_short_8x8block_t mb_rres8[MAX_PLANE][4]; + __declspec(align(32)) h264_short_macroblock_t cof[MAX_PLANE]; + __declspec(align(32)) h264_short_block_t cof4[MAX_PLANE][16]; // TODO: get this to work, one of these days + __declspec(align(32)) h264_short_macroblock_t ipcm[MAX_PLANE]; + }; + + int cofu[16]; + + // Scaling matrix info + int InvLevelScale4x4_Intra[3][6][4][4]; + int InvLevelScale4x4_Inter[3][6][4][4]; + int InvLevelScale8x8_Intra[3][6][64]; + int InvLevelScale8x8_Inter[3][6][64]; + + int *qmatrix[12]; + + // Cabac + // TODO: we could optimize coefficient reading by storing the levels/runs instead of coefficients + int16_t coeff[64]; // one more for EOB + int coeff_ctr; + int pos; + + //weighted prediction + unsigned int apply_weights; + unsigned int luma_log2_weight_denom; + unsigned int chroma_log2_weight_denom; + int wp_weight[2][MAX_REFERENCE_PICTURES][3]; // weight in [list][index][component] order + int wp_offset[6][MAX_REFERENCE_PICTURES][3]; // offset in [list][index][component] order + int wbp_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; //weight in [list][fw_index][bw_index][component] order + int wp_round_luma; + int wp_round_chroma; + + void (*read_CBP_and_coeffs_from_NAL) (Macroblock *currMB); + int (*decode_one_component ) (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, struct storable_picture *dec_picture); + int (*readSlice ) (struct img_par *, struct inp_par *); + int (*nal_startcode_follows ) (struct slice*, int ); + void (*read_motion_info_from_NAL) (Macroblock *currMB); + void (*read_one_macroblock ) (Macroblock *currMB); + void (*interpret_mb_mode ) (Macroblock *currMB); + void (*compute_colocated ) (struct slice *currSlice, struct colocated_params *p, struct storable_picture **listX[6]); + + void (*linfo_cbp_intra) (int len,int info,int *cbp, int *dummy); + void (*linfo_cbp_inter) (int len,int info,int *cbp, int *dummy); +} Slice; + +//****************************** ~DM *********************************** + +// image parameters +typedef struct img_par +{ + struct inp_par *p_Inp; + pic_parameter_set_rbsp_t *active_pps; + seq_parameter_set_rbsp_t *active_sps; + seq_parameter_set_rbsp_t SeqParSet[MAXSPS]; + pic_parameter_set_rbsp_t PicParSet[MAXPPS]; + + struct sei_params *p_SEI; + + struct old_slice_par *old_slice; + int number; //!< frame number + unsigned int current_mb_nr; // bitstream order + unsigned int num_dec_mb; + short current_slice_nr; + int *intra_block; + + int qp; //!< quant for the current frame + + int sp_switch; //!< 1 for switching sp, 0 for normal sp + int type; //!< image type INTER/INTRA + int width; + int height; + int width_cr; //!< width chroma + int height_cr; //!< height chroma + int mb_x; + int mb_y; + int block_x; + int block_y; + int pix_c_x; + int pix_c_y; + + int allrefzero; + + byte **ipredmode; //!< prediction type [90][74] + h264_nz_coefficient *nz_coeff; + int **siblock; + int cod_counter; //!< Current count of number of skipped macroblocks in a row + + int structure; //!< Identify picture structure type + + Slice *currentSlice; //!< pointer to current Slice data struct + Macroblock *mb_data; //!< array containing all MBs of a whole frame + Macroblock *mb_data_JV[MAX_PLANE]; //!< mb_data to be used for 4:4:4 independent mode + int colour_plane_id; //!< colour_plane_id of the current coded slice + int ChromaArrayType; + + // For MB level frame/field coding + int mb_aff_frame_flag; + + // for signalling to the neighbour logic that this is a deblocker call + int DeblockCall; + byte mixedModeEdgeFlag; + + // picture error concealment + // concealment_head points to first node in list, concealment_end points to + // last node in list. Initialize both to NULL, meaning no nodes in list yet + struct concealment_node *concealment_head; + struct concealment_node *concealment_end; + + DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations + + int num_ref_idx_l0_active; //!< number of forward reference + int num_ref_idx_l1_active; //!< number of backward reference + + int slice_group_change_cycle; + + int redundant_pic_cnt; + + unsigned int pre_frame_num; //!< store the frame_num in the last decoded slice. For detecting gap in frame_num. + int non_conforming_stream; + + // End JVT-D101 + // POC200301: from unsigned int to int + int toppoc; //poc for this top field // POC200301 + int bottompoc; //poc of bottom field of frame + int framepoc; //poc of this frame // POC200301 + unsigned int frame_num; //frame_num for this frame + unsigned int field_pic_flag; + byte bottom_field_flag; + + //the following is for slice header syntax elements of poc + // for poc mode 0. + unsigned int pic_order_cnt_lsb; + int delta_pic_order_cnt_bottom; + // for poc mode 1. + int delta_pic_order_cnt[3]; + + // //////////////////////// + // for POC mode 0: + signed int PrevPicOrderCntMsb; + unsigned int PrevPicOrderCntLsb; + signed int PicOrderCntMsb; + + // for POC mode 1: + unsigned int AbsFrameNum; + signed int ExpectedPicOrderCnt, PicOrderCntCycleCnt, FrameNumInPicOrderCntCycle; + unsigned int PreviousFrameNum, FrameNumOffset; + int ExpectedDeltaPerPicOrderCntCycle; + int PreviousPOC, ThisPOC; + int PreviousFrameNumOffset; + // ///////////////////////// + + int idr_flag; + int nal_reference_idc; //!< nal_reference_idc from NAL unit + + int idr_pic_id; + + int MaxFrameNum; + + unsigned int PicWidthInMbs; + unsigned int PicHeightInMapUnits; + unsigned int FrameHeightInMbs; + unsigned int PicHeightInMbs; + unsigned int PicSizeInMbs; + unsigned int FrameSizeInMbs; + unsigned int oldFrameSizeInMbs; + + int no_output_of_prior_pics_flag; + int long_term_reference_flag; + int adaptive_ref_pic_buffering_flag; + + int last_has_mmco_5; + int last_pic_bottom_field; + + // Fidelity Range Extensions Stuff + short bitdepth_luma; + short bitdepth_chroma; + int bitdepth_scale[2]; + int bitdepth_luma_qp_scale; + int bitdepth_chroma_qp_scale; + unsigned int dc_pred_value_comp[MAX_PLANE]; //!< component value for DC prediction (depends on component pel bit depth) + int max_pel_value_comp[MAX_PLANE]; //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth) + int Transform8x8Mode; + int profile_idc; + int yuv_format; + int lossless_qpprime_flag; + int num_blk8x8_uv; + int num_uv_blocks; + int num_cdc_coeff; + int mb_cr_size_x; + int mb_cr_size_y; + int mb_cr_size_x_blk; + int mb_cr_size_y_blk; + int mb_size[3][2]; //!< component macroblock dimensions + int mb_size_blk[3][2]; //!< component macroblock dimensions + int mb_size_shift[3][2]; + int subpel_x; + int subpel_y; + int shiftpel_x; + int shiftpel_y; + + int max_vmv_r; //!< maximum vertical motion vector range in luma quarter frame pixel units for the current level_idc + int max_mb_vmv_r; //!< maximum vertical motion vector range in luma quarter pixel units for the current level_idc + + // picture error concealment + int last_ref_pic_poc; + int ref_poc_gap; + int poc_gap; + int earlier_missing_poc; + unsigned int frame_to_conceal; + int IDR_concealment_flag; + int conceal_slice_type; + + // random access point decoding + int recovery_point; + int recovery_point_found; + int recovery_frame_cnt; + int recovery_frame_num; + int recovery_poc; + + int separate_colour_plane_flag; + + int frame_number; + int init_bl_done; + + // Redundant slices. Should be moved to another structure and allocated only if extended profile + unsigned int previous_frame_num; //!< frame number of previous slice + int ref_flag[17]; //!< 0: i-th previous frame is incorrect + //!< non-zero: i-th previous frame is correct + int Is_primary_correct; //!< if primary frame is correct, 0: incorrect + int Is_redundant_correct; //!< if redundant frame is correct, 0:incorrect + int redundant_slice_ref_idx; //!< reference index of redundant slice + + //FILE *p_log; //!< SNR file + int LastAccessUnitExists; + int NALUCount; + + Boolean global_init_done; + + int *qp_per_matrix; + int *qp_rem_matrix; + + struct frame_store *last_out_fs; + int pocs_in_dpb[100]; + + + struct storable_picture *dec_picture; + struct storable_picture *dec_picture_JV[MAX_PLANE]; //!< dec_picture to be used during 4:4:4 independent mode decoding + struct storable_picture *no_reference_picture; //!< dummy storable picture for recovery point + struct storable_picture **listX[6]; + + // Error parameters + struct object_buffer *erc_object_list; + struct ercVariables_s *erc_errorVar; + + int erc_mvperMB; + struct img_par *erc_img; + int ec_flag[SE_MAX_ELEMENTS]; //!< array to set errorconcealment + + struct memory_input_struct *mem_input; + + struct frame_store *out_buffer; + + struct storable_picture *pending_output; + int pending_output_state; + int recovery_flag; + + // dpb + struct decoded_picture_buffer *p_Dpb; + + char listXsize[6]; + // report + char cslice_type[9]; + // FMO + int *MbToSliceGroupMap; + int *MapUnitToSliceGroupMap; + int NumberOfSliceGroups; // the number of slice groups -1 (0 == scan order, 7 == maximum) + +#if (ENABLE_OUTPUT_TONEMAPPING) + struct tone_mapping_struct_s *seiToneMapping; +#endif + + // benski> buffer of storablge pictures ready for output. + // might be able to optimize a tad by making a ringbuffer, but i doubt it matters + struct storable_picture **out_pictures; + size_t size_out_pictures; + size_t num_out_pictures; + + ImageCache image_cache[2]; // [0] is luma [1] is chroma (shared for both planes) + MotionCache motion_cache; + + h264_pic_position *PicPos; //! Helper array to access macroblock positions. + + NALU_t *nalu; // a cache so we don't re-alloc every time + + void (*getNeighbour) (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); + void (*getNeighbourPX_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); + void (*getNeighbourXP_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); + void (*getNeighbourLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourPXLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourXPLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourLeftLuma)(const Macroblock *currMB, PixelPos *pix); + void (*getNeighbourNXLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN<0, yN full range + void (*getNeighbourLeft) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN<0, yN=0 + void (*getNeighbourUp) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN=0, yN<0 + void (*getNeighbourNX) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN full range + void (*getNeighbourNP) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0 + void (*getNeighbourNPChromaNB)(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0 + void (*getNeighbour0X) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN=0, yN full range + void (*getNeighbour0XLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN=0, yN full range + void (*getNeighbourX0) (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); // xN full range, yN = 0 + void (*getNeighbourUpLuma) (const Macroblock *currMB, PixelPos *pix); // xN=0, yN<0 + void (*getNeighbourNPLumaNB)(const Macroblock *currMB, int yN, PixelPos *pix); + void (*getNeighbourPXLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourPXLumaNB_NoPos)(const Macroblock *currMB, int yN, PixelPos *pix); + void (*getNeighbourPPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourXPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourXPLumaNB_NoPos)(const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*get_mb_block_pos) (const h264_pic_position *PicPos, int mb_addr, short *x, short *y); + void (*GetStrength) (byte Strength[16], Macroblock *MbQ, int dir,int edge, int mvlimit, struct storable_picture *p); + void (*EdgeLoopLuma) (ColorPlane pl, struct video_image *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, struct storable_picture *p); + void (*EdgeLoopChroma) (struct video_image *image, byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, struct storable_picture *p); +} VideoParameters; + +// input parameters from configuration file +typedef struct inp_par +{ + int intra_profile_deblocking; //!< Loop filter usage determined by flags and parameters in bitstream + + // Output sequence format related variables + FrameFormat output; //!< output related information + +#ifdef _LEAKYBUCKET_ + unsigned long R_decoder; //!< Decoder Rate in HRD Model + unsigned long B_decoder; //!< Decoder Buffer size in HRD model + unsigned long F_decoder; //!< Decoder Initial buffer fullness in HRD model + char LeakyBucketParamFile[FILE_NAME_SIZE]; //!< LeakyBucketParamFile +#endif + + // picture error concealment + int ref_poc_gap; + int poc_gap; +} InputParameters; + +typedef struct old_slice_par +{ + unsigned field_pic_flag; + unsigned frame_num; + int nal_ref_idc; + unsigned pic_oder_cnt_lsb; + int delta_pic_oder_cnt_bottom; + int delta_pic_order_cnt[2]; + byte bottom_field_flag; + byte idr_flag; + int idr_pic_id; + int pps_id; +} OldSliceParams; + +typedef struct decoder_params +{ + InputParameters *p_Inp; //!< Input Parameters + VideoParameters *p_Vid; //!< Image Parameters + +} DecoderParams; + +#ifdef TRACE +extern FILE *p_trace; //!< Trace file +extern int bitcounter; +#endif + +// prototypes + +extern void error(char *text, int code); + +// dynamic mem allocation +extern int init_global_buffers(VideoParameters *p_Vid); +extern void free_global_buffers(VideoParameters *p_Vid); + +extern int RBSPtoSODB(byte *streamBuffer, int last_byte_pos); +extern int EBSPtoRBSP(byte *streamBuffer, int end_bytepos); + +void FreePartition (DataPartition *dp, int n); +DataPartition *AllocPartition(int n); + +void tracebits(const char *trace_str, int len, int info,int value1); +void tracebits2(const char *trace_str, int len, int info); + +unsigned CeilLog2 ( unsigned uiVal); +unsigned CeilLog2_sf( unsigned uiVal); + +// For 4:4:4 independent mode +extern void change_plane_JV( VideoParameters *p_Vid, int nplane ); +extern void make_frame_picture_JV(VideoParameters *p_Vid); + + +#endif + + diff --git a/Src/h264dec/ldecod/inc/header.h b/Src/h264dec/ldecod/inc/header.h new file mode 100644 index 00000000..f3185b07 --- /dev/null +++ b/Src/h264dec/ldecod/inc/header.h @@ -0,0 +1,22 @@ +/*! + ************************************************************************************* + * \file header.h + * + * \brief + * Prototypes for header.c + ************************************************************************************* + */ + +#ifndef _HEADER_H_ +#define _HEADER_H_ + +extern void FirstPartOfSliceHeader(Slice *currSlice); +extern void RestOfSliceHeader (Slice *currSlice); + +extern void dec_ref_pic_marking(VideoParameters *p_Vid, Bitstream *currStream); + +extern void decode_poc(VideoParameters *p_Vid); +extern int dumppoc(VideoParameters *p_Vid); + +#endif + diff --git a/Src/h264dec/ldecod/inc/image.h b/Src/h264dec/ldecod/inc/image.h new file mode 100644 index 00000000..2540f3c5 --- /dev/null +++ b/Src/h264dec/ldecod/inc/image.h @@ -0,0 +1,33 @@ + +/*! + ************************************************************************ + * \file image.h + * + * \brief + * prototypes for image.c + * + ************************************************************************ + */ + +#ifndef _IMAGE_H_ +#define _IMAGE_H_ + +#include "mbuffer.h" + +extern int picture_order(VideoParameters *p_Vid); + +extern void decode_one_slice (Slice *currSlice); + +extern void exit_picture(VideoParameters *p_Vid, StorablePicture **dec_picture); +extern int decode_one_frame(VideoParameters *p_Vid, uint64_t time_code); + +extern int is_new_picture(StorablePicture *dec_picture, Slice *currSlice, OldSliceParams *p_old_slice); +extern void init_old_slice(OldSliceParams *p_old_slice); +// For 4:4:4 independent mode +extern void copy_dec_picture_JV( VideoParameters *p_Vid, StorablePicture *dst, StorablePicture *src ); + +extern void frame_postprocessing(VideoParameters *p_Vid); +extern void field_postprocessing(VideoParameters *p_Vid); + +#endif + diff --git a/Src/h264dec/ldecod/inc/intra16x16_pred.h b/Src/h264dec/ldecod/inc/intra16x16_pred.h new file mode 100644 index 00000000..6680280a --- /dev/null +++ b/Src/h264dec/ldecod/inc/intra16x16_pred.h @@ -0,0 +1,25 @@ +/*! + ************************************************************************************* + * \file intra16x16_pred.h + * + * \brief + * definitions for intra 16x16 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ + +#ifndef _INTRA16x16_PRED_H_ +#define _INTRA16x16_PRED_H_ + +#include "global.h" +#include "mbuffer.h" + +extern int intrapred16x16(Macroblock *currMB, ColorPlane pl, int b8); + +#endif + diff --git a/Src/h264dec/ldecod/inc/intra4x4_pred.h b/Src/h264dec/ldecod/inc/intra4x4_pred.h new file mode 100644 index 00000000..a50c5262 --- /dev/null +++ b/Src/h264dec/ldecod/inc/intra4x4_pred.h @@ -0,0 +1,25 @@ +/*! + ************************************************************************************* + * \file intra4x4_pred.h + * + * \brief + * definitions for intra 4x4 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ + +#ifndef _INTRA4x4_PRED_H_ +#define _INTRA4x4_PRED_H_ + +#include "global.h" +#include "mbuffer.h" + +extern int intrapred(Macroblock *currMB, ColorPlane pl, int ioff, int joff, int img_block_x, int img_block_y); + +#endif + diff --git a/Src/h264dec/ldecod/inc/intra8x8_pred.h b/Src/h264dec/ldecod/inc/intra8x8_pred.h new file mode 100644 index 00000000..ff238460 --- /dev/null +++ b/Src/h264dec/ldecod/inc/intra8x8_pred.h @@ -0,0 +1,25 @@ +/*! + ************************************************************************************* + * \file intra8x8_pred.h + * + * \brief + * definitions for intra 8x8 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ + +#ifndef _INTRA8x8_PRED_H_ +#define _INTRA8x8_PRED_H_ + +#include "global.h" +#include "mbuffer.h" + +extern int intrapred8x8(Macroblock *currMB, ColorPlane pl, int ioff, int joff); + +#endif + diff --git a/Src/h264dec/ldecod/inc/leaky_bucket.h b/Src/h264dec/ldecod/inc/leaky_bucket.h new file mode 100644 index 00000000..ad605a6a --- /dev/null +++ b/Src/h264dec/ldecod/inc/leaky_bucket.h @@ -0,0 +1,26 @@ + +/*! + ************************************************************************************* + * \file leaky_bucket.h + * + * \brief + * Header for Leaky Buffer parameters + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Shankar Regunathan <shanre@microsoft.com> + ************************************************************************************* + */ +#ifndef _LEAKY_BUCKET_H_ +#define _LEAKY_BUCKET_H_ + +#include "global.h" + +#ifdef _LEAKYBUCKET_ +// Leaky Bucket functions +unsigned long GetBigDoubleWord(FILE *fp); +void calc_buffer(InputParameters *p_Inp); +#endif + +#endif + diff --git a/Src/h264dec/ldecod/inc/loopfilter.h b/Src/h264dec/ldecod/inc/loopfilter.h new file mode 100644 index 00000000..c8b739fa --- /dev/null +++ b/Src/h264dec/ldecod/inc/loopfilter.h @@ -0,0 +1,23 @@ +/*! + ************************************************************************ + * \file + * loopfilter.h + * \brief + * external deblocking filter interface + ************************************************************************ + */ + +#ifndef _LOOPFILTER_H_ +#define _LOOPFILTER_H_ + +#include "global.h" +#include "mbuffer.h" + +extern void DeblockPicture(VideoParameters *p_Vid, StorablePicture *p) ; + + +extern void EdgeLoopLumaNormal_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p); +extern void EdgeLoopLumaNormal_Horiz_sse2(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p); +extern void EdgeLoopChromaNormal_Horiz(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p); + +#endif //_LOOPFILTER_H_ diff --git a/Src/h264dec/ldecod/inc/macroblock.h b/Src/h264dec/ldecod/inc/macroblock.h new file mode 100644 index 00000000..d11547a4 --- /dev/null +++ b/Src/h264dec/ldecod/inc/macroblock.h @@ -0,0 +1,177 @@ +/*! + ************************************************************************ + * \file macroblock.h + * + * \brief + * Arrays for macroblock encoding + * + * \author + * Inge Lille-Langoy <inge.lille-langoy@telenor.com> + * Copyright (C) 1999 Telenor Satellite Services, Norway + ************************************************************************ + */ + +#ifndef _MACROBLOCK_H_ +#define _MACROBLOCK_H_ + +#include "global.h" +#include "mbuffer.h" +#include "block.h" + +//! single scan pattern +static const byte SNGL_SCAN[16][2] = +{ + {0,0},{1,0},{0,1},{0,2}, + {1,1},{2,0},{3,0},{2,1}, + {1,2},{0,3},{1,3},{2,2}, + {3,1},{3,2},{2,3},{3,3} +}; + +static const byte SNGL_SCAN_1D[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15}; +static const byte SNGL_SCAN_DC[16] = { 0, 1, 2, 8, 3, 4, 5, 6, 9, 10, 11, 12, 7, 13, 14, 15}; +//! field scan pattern +static const byte FIELD_SCAN[16][2] = +{ + {0,0},{0,1},{1,0},{0,2}, + {0,3},{1,1},{1,2},{1,3}, + {2,0},{2,1},{2,2},{2,3}, + {3,0},{3,1},{3,2},{3,3} +}; + +static const byte FIELD_SCAN_1D[16] = {0, 4, 1, 8, 12, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; +static const byte FIELD_SCAN_DC[16] = {0, 2, 1, 8, 10, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15}; +//! used to control block sizes : Not used/16x16/16x8/8x16/8x8/8x4/4x8/4x4 +static const int BLOCK_STEP[8][2]= +{ + {0,0},{4,4},{4,2},{2,4},{2,2},{2,1},{1,2},{1,1} +}; + +//! single scan pattern +static const byte SNGL_SCAN8x8[64][2] = { + {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1}, {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0}, + {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4}, {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3}, + {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4}, {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6}, + {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5}, {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7} +}; + +static const byte SNGL_SCAN8x8_1D[64] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +//! field scan pattern +static const byte FIELD_SCAN8x8[64][2] = { // 8x8 + {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2}, {2,0}, {1,3}, {0,5}, {0,6}, {0,7}, {1,4}, {2,1}, {3,0}, + {2,2}, {1,5}, {1,6}, {1,7}, {2,3}, {3,1}, {4,0}, {3,2}, {2,4}, {2,5}, {2,6}, {2,7}, {3,3}, {4,1}, {5,0}, {4,2}, + {3,4}, {3,5}, {3,6}, {3,7}, {4,3}, {5,1}, {6,0}, {5,2}, {4,4}, {4,5}, {4,6}, {4,7}, {5,3}, {6,1}, {6,2}, {5,4}, + {5,5}, {5,6}, {5,7}, {6,3}, {7,0}, {7,1}, {6,4}, {6,5}, {6,6}, {6,7}, {7,2}, {7,3}, {7,4}, {7,5}, {7,6}, {7,7} +}; + +static const byte FIELD_SCAN8x8_1D[64] = +{ + 0, 8, 16, 1, 9, 24, 32, 17, + 2, 25, 40, 48, 56, 33, 10, 3, + 18, 41, 49, 57, 26, 11, 4, 19, + 34, 42, 50, 58, 27, 12, 5, 20, + 35, 43, 51, 59, 28, 13, 6, 21, + 36, 44, 52, 60, 29, 14, 22, 37, + 45, 53, 61, 30, 7, 15, 38, 46, + 54, 62, 23, 31, 39, 47, 55, 63 +}; +//! single scan pattern +static const byte SCAN_YUV422[8][2] = +{ + {0,0},{0,1}, + {1,0},{0,2}, + {0,3},{1,1}, + {1,2},{1,3} +}; + +static const unsigned char cbp_blk_chroma[8][4] = +{ {16, 17, 18, 19}, + {20, 21, 22, 23}, + {24, 25, 26, 27}, + {28, 29, 30, 31}, + {32, 33, 34, 35}, + {36, 37, 38, 39}, + {40, 41, 42, 43}, + {44, 45, 46, 47} +}; + +static const unsigned char cofuv_blk_x[3][8][4] = +{ { {0, 1, 0, 1}, + {0, 1, 0, 1}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} }, + + { {0, 1, 0, 1}, + {0, 1, 0, 1}, + {0, 1, 0, 1}, + {0, 1, 0, 1}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} }, + + { {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 1, 0, 1}, + {2, 3, 2, 3} } +}; + +static const unsigned char cofuv_blk_y[3][8][4] = +{ + { { 0, 0, 1, 1}, + { 0, 0, 1, 1}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0} }, + + { { 0, 0, 1, 1}, + { 2, 2, 3, 3}, + { 0, 0, 1, 1}, + { 2, 2, 3, 3}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0} }, + + { { 0, 0, 1, 1}, + { 0, 0, 1, 1}, + { 2, 2, 3, 3}, + { 2, 2, 3, 3}, + { 0, 0, 1, 1}, + { 0, 0, 1, 1}, + { 2, 2, 3, 3}, + { 2, 2, 3, 3}} +}; + + +extern void setup_slice_methods(Slice *currSlice); +extern void get_neighbors(Macroblock *currMB, PixelPos *block, int mb_x, int mb_y, int blockshape_x); +extern void get_neighbors0016(Macroblock *currMB, PixelPos *block); + +extern void start_macroblock (Slice *currSlice, Macroblock **currMB); +extern int decode_one_macroblock(Macroblock *currMB, StorablePicture *dec_picture); +extern Boolean exit_macroblock (Slice *currSlice, int eos_bit); + +#endif + diff --git a/Src/h264dec/ldecod/inc/mb_prediction.h b/Src/h264dec/ldecod/inc/mb_prediction.h new file mode 100644 index 00000000..cea3bd45 --- /dev/null +++ b/Src/h264dec/ldecod/inc/mb_prediction.h @@ -0,0 +1,33 @@ + +/*! + ************************************************************************************* + * \file mb_prediction.h + * + * \brief + * Functions for macroblock prediction + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + ************************************************************************************* + */ + +#ifndef _MB_PREDICTION_H_ +#define _MB_PREDICTION_H_ + +extern int mb_pred_intra4x4 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern int mb_pred_intra16x16 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern int mb_pred_intra8x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); + +extern void mb_pred_skip (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_sp_skip (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_p_inter8x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_p_inter16x16(Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_p_inter16x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_p_inter8x16 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_b_dspatial (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_b_dtemporal (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_b_inter8x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +extern void mb_pred_ipcm (Macroblock *currMB); + +#endif diff --git a/Src/h264dec/ldecod/inc/mbuffer.h b/Src/h264dec/ldecod/inc/mbuffer.h new file mode 100644 index 00000000..c06e9fb0 --- /dev/null +++ b/Src/h264dec/ldecod/inc/mbuffer.h @@ -0,0 +1,235 @@ + +/*! + *********************************************************************** + * \file + * mbuffer.h + * + * \brief + * Frame buffer functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Sühring <suehring@hhi.de> + * - Alexis Michael Tourapis <alexismt@ieee.org> + + * - Jill Boyce <jill.boyce@thomson.net> + * - Saurav K Bandyopadhyay <saurav@ieee.org> + * - Zhenyu Wu <Zhenyu.Wu@thomson.net + * - Purvin Pandit <Purvin.Pandit@thomson.net> + * + *********************************************************************** + */ +#ifndef _MBUFFER_H_ +#define _MBUFFER_H_ + +#include "global.h" +#include <bfc/platform/types.h> + +#define MAX_LIST_SIZE 33 +//! definition of pic motion parameters + +typedef struct pic_motion_params +{ + PicMotion **motion[2]; + h264_ref_t ***field_references; + byte * mb_field; //!< field macroblock indicator + byte ** field_frame; //!< indicates if co_located is field or frame. + int padding[1]; +} PicMotionParams; + +typedef struct video_image +{ + imgpel **img; + imgpel *base_address; + size_t stride; + struct video_image *next; // for the memory cacher +} VideoImage; +//! definition a picture (field or frame) +typedef struct storable_picture +{ + PictureStructure structure; + + int poc; + int top_poc; + int bottom_poc; + int frame_poc; + h264_ref_t ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + h264_ref_t frm_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + h264_ref_t top_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + h264_ref_t bottom_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + unsigned frame_num; + unsigned recovery_frame; + + int pic_num; + int long_term_pic_num; + int long_term_frame_idx; + + byte is_long_term; + int used_for_reference; + int is_output; + int non_existing; + + short max_slice_id; + + int size_x, size_y, size_x_cr, size_y_cr; + int size_x_m1, size_y_m1, size_x_cr_m1, size_y_cr_m1; + int chroma_vector_adjustment; + int coded_frame; + int mb_aff_frame_flag; + unsigned PicWidthInMbs; + unsigned PicSizeInMbs; + + //imgpel ** imgY; //!< Y picture component + union + { + VideoImage *plane_images[3]; // to ensure array alignment + struct + { + VideoImage *imgY; + VideoImage *imgUV[2]; //!< U and V picture components + }; + }; + + struct pic_motion_params motion; //!< Motion info + struct pic_motion_params JVmotion[MAX_PLANE]; //!< Motion info for 4:4:4 independent mode decoding + + short ** slice_id; //!< reference picture [mb_x][mb_y] + + struct storable_picture *top_field; // for mb aff, if frame for referencing the top field + struct storable_picture *bottom_field; // for mb aff, if frame for referencing the bottom field + struct storable_picture *frame; // for mb aff, if field for referencing the combined frame + + int slice_type; + int idr_flag; + int no_output_of_prior_pics_flag; + int long_term_reference_flag; + int adaptive_ref_pic_buffering_flag; + + int chroma_format_idc; + int frame_mbs_only_flag; + int frame_cropping_flag; + int frame_cropping_rect_left_offset; + int frame_cropping_rect_right_offset; + int frame_cropping_rect_top_offset; + int frame_cropping_rect_bottom_offset; + int qp; + int chroma_qp_offset[2]; + int slice_qp_delta; + DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations + + // picture error concealment + int concealed_pic; //indicates if this is a concealed picture + + // variables for tone mapping + int seiHasTone_mapping; + int tone_mapping_model_id; + int tonemapped_bit_depth; + imgpel* tone_mapping_lut; //!< tone mapping look up table + + int retain_count; // benski> we're going to reference count these things + uint64_t time_code; // user-passed timecode for this frame +} StorablePicture; + +//! definition a picture (field or frame) +typedef struct colocated_params +{ + int mb_adaptive_frame_field_flag; + int size_x, size_y; + byte is_long_term; + + MotionParams frame; + MotionParams top; + MotionParams bottom; + +} ColocatedParams; + +//! Frame Stores for Decoded Picture Buffer +typedef struct frame_store +{ + int is_used; //!< 0=empty; 1=top; 2=bottom; 3=both fields (or frame) + int is_reference; //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used + int is_long_term; //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used + int is_orig_reference; //!< original marking by nal_ref_idc: 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used + + int is_non_existent; + + unsigned frame_num; + unsigned recovery_frame; + + int frame_num_wrap; + int long_term_frame_idx; + int is_output; + int poc; + + // picture error concealment + int concealment_reference; + + StorablePicture *frame; + StorablePicture *top_field; + StorablePicture *bottom_field; + +} FrameStore; + + +//! Decoded Picture Buffer +typedef struct decoded_picture_buffer +{ + VideoParameters *p_Vid; + InputParameters *p_Inp; + FrameStore **fs; + FrameStore **fs_ref; + FrameStore **fs_ltref; + unsigned size; + unsigned used_size; + unsigned ref_frames_in_buffer; + unsigned ltref_frames_in_buffer; + int last_output_poc; + int max_long_term_pic_idx; + + int init_done; + int num_ref_frames; + + FrameStore *last_picture; +} DecodedPictureBuffer; + +extern void init_dpb(VideoParameters *p_Vid); +extern void free_dpb(VideoParameters *p_Vid); +extern FrameStore* alloc_frame_store(void); +extern void free_frame_store(VideoParameters *p_Vid, FrameStore* f); +extern StorablePicture* alloc_storable_picture(VideoParameters *p_Vid, PictureStructure type, int size_x, int size_y, int size_x_cr, int size_y_cr); +extern void free_storable_picture(VideoParameters *p_Vid, StorablePicture* p); +extern void store_picture_in_dpb(VideoParameters *p_Vid, StorablePicture* p); +extern void flush_dpb(VideoParameters *p_Vid); + +extern void dpb_split_field (VideoParameters *p_Vid, FrameStore *fs); +extern void dpb_combine_field(VideoParameters *p_Vid, FrameStore *fs); +extern void dpb_combine_field_yuv(VideoParameters *p_Vid, FrameStore *fs); + +extern void init_lists (Slice *currSlice); +extern void reorder_ref_pic_list(VideoParameters *p_Vid, StorablePicture **list, char *list_size, + int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, + int *abs_diff_pic_num_minus1, int *long_term_pic_idx); + +extern void init_mbaff_lists(VideoParameters *p_Vid); +extern void alloc_ref_pic_list_reordering_buffer(Slice *currSlice); +extern void free_ref_pic_list_reordering_buffer(Slice *currSlice); + +extern void fill_frame_num_gap(VideoParameters *p_Vid); + +extern ColocatedParams* alloc_colocated(VideoParameters *p_Vid, int size_x, int size_y,int mb_adaptive_frame_field_flag); +extern void free_colocated(VideoParameters *p_Vid, ColocatedParams* p); +extern void compute_colocated (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]); +extern void compute_colocated_frames_mbs (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]); + +// For 4:4:4 independent mode +extern void compute_colocated_JV ( Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]); +extern void copy_storable_param_JV( VideoParameters *p_Vid, PicMotionParams *JVplane, PicMotionParams *motion ); + +// benski> decoded output pictures +void out_storable_picture_get(VideoParameters *img, StorablePicture **pic); +void out_storable_picture_add(VideoParameters *img, StorablePicture *pic); +void out_storable_pictures_init(VideoParameters *img, size_t count); +void out_storable_pictures_destroy(VideoParameters *img); + +#endif + diff --git a/Src/h264dec/ldecod/inc/mc_prediction.h b/Src/h264dec/ldecod/inc/mc_prediction.h new file mode 100644 index 00000000..e3165e27 --- /dev/null +++ b/Src/h264dec/ldecod/inc/mc_prediction.h @@ -0,0 +1,61 @@ + +/*! + ************************************************************************************* + * \file mc_prediction.h + * + * \brief + * definitions for motion compensated prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ + +#ifndef _MC_PREDICTION_H_ +#define _MC_PREDICTION_H_ + +#include "global.h" +#include "mbuffer.h" + +extern void get_block_luma (Macroblock *currMB, ColorPlane pl, StorablePicture *list, int x_pos, int y_pos, const short *motion_vector, int ver_block_size, int hor_block_size, h264_imgpel_macroblock_t block); + +extern void intra_cr_decoding (Macroblock *currMB, int yuv); +extern void prepare_direct_params(Macroblock *currMB, StorablePicture *dec_picture, short pmvl0[2], short pmvl1[2],char *l0_rFrame, char *l1_rFrame); +extern void perform_mc (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int block_size_x, int block_size_y, int curr_mb_field); +extern void perform_mc16x16 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int list_offset, int curr_mb_field); +extern void perform_mc16x8 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field); +extern void perform_mc8x8 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field); +extern void perform_mc8x16 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field); + +void weighted_mc_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); +void weighted_mc_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); +void weighted_mc_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); + +void weighted_mc_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); +void weighted_mc_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); +void weighted_mc_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); + +void weighted_mc_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); +void weighted_mc_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); +void weighted_mc_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); + +void weighted_bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +void weighted_bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +void weighted_bi_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); + +void weighted_bi_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +void weighted_bi_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +void weighted_bi_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); + +void weighted_bi_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +void weighted_bi_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +void weighted_bi_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); + +void bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0); +void bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0); + +#endif + diff --git a/Src/h264dec/ldecod/inc/meminput.h b/Src/h264dec/ldecod/inc/meminput.h new file mode 100644 index 00000000..babaf2f0 --- /dev/null +++ b/Src/h264dec/ldecod/inc/meminput.h @@ -0,0 +1,26 @@ +#ifndef _MEMINPUT_H +#define _MEMINPUT_H +#pragma once + +#include "nalucommon.h" +#include <bfc/platform/types.h> + +typedef struct memory_input_struct +{ + const uint8_t *user_buffer; + size_t user_buffer_size; + size_t user_buffer_read; + + uint8_t *Buf; + int resetting; + int skip_b_frames; +} memory_input_t; + +int GetMemoryNALU (VideoParameters *p_Vid, NALU_t *nalu); +void OpenMemory(VideoParameters *p_Vid, const char *fn); +void CloseMemory(VideoParameters *p_Vid); +void malloc_mem_input(VideoParameters *p_Vid); +void free_mem_input(VideoParameters *p_Vid); + +#endif + diff --git a/Src/h264dec/ldecod/inc/nalu.h b/Src/h264dec/ldecod/inc/nalu.h new file mode 100644 index 00000000..d10e6bfa --- /dev/null +++ b/Src/h264dec/ldecod/inc/nalu.h @@ -0,0 +1,28 @@ + +/*! + ************************************************************************************** + * \file + * nalu.h + * \brief + * Common NALU support functions + * + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + *************************************************************************************** + */ + + +#ifndef _NALU_H_ +#define _NALU_H_ + +#include "nalucommon.h" + +extern void initBitsFile (VideoParameters *p_Vid); +extern void CheckZeroByteNonVCL(VideoParameters *p_Vid, NALU_t *nalu); +extern void CheckZeroByteVCL (VideoParameters *p_Vid, NALU_t *nalu); + +extern int read_next_nalu(VideoParameters *p_Vid, NALU_t *nalu); + +#endif diff --git a/Src/h264dec/ldecod/inc/optim.h b/Src/h264dec/ldecod/inc/optim.h new file mode 100644 index 00000000..bfcc07f8 --- /dev/null +++ b/Src/h264dec/ldecod/inc/optim.h @@ -0,0 +1,46 @@ +#pragma once + +typedef struct optimized_functions +{ + //void (*itrans4x4)(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y); + void (*itrans8x8)(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x); + + void (*weighted_mc_prediction16x16)(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); + void (*weighted_mc_prediction16x8)(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); + void (*weighted_mc_prediction8x8)(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom); + + void (*weighted_bi_prediction16x16)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); + void (*weighted_bi_prediction16x8)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); + void (*weighted_bi_prediction8x8)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); + + void (*bi_prediction8x8)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0); + + void (*copy_image_data_16x16_stride)(struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source); + int (*code_from_bitstream_2d_5_4)(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab); + int (*code_from_bitstream_2d_17_4)(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab); + int (*code_from_bitstream_2d_16_1)(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab); +} OptimizedFunctions; + +extern OptimizedFunctions opt; + +/* define macros for these function calls. this way we could do specific builds that call the functions directly, if we have the need */ +#ifdef _DEBUG +#define opt_itrans4x4 (itrans4x4_c) +#else +#define opt_itrans4x4 (itrans4x4_mmx) +#endif +#define opt_itrans8x8 (opt.itrans8x8) + +#define opt_weighted_mc_prediction16x16 (opt.weighted_mc_prediction16x16) +#define opt_weighted_mc_prediction16x8 (opt.weighted_mc_prediction16x8) +#define opt_weighted_mc_prediction8x8 (opt.weighted_mc_prediction8x8) + +#define opt_weighted_bi_prediction16x16 (opt.weighted_bi_prediction16x16) +#define opt_weighted_bi_prediction16x8 (opt.weighted_bi_prediction16x8) +#define opt_weighted_bi_prediction8x8 (opt.weighted_bi_prediction8x8) + +#define opt_bi_prediction8x8 (opt.bi_prediction8x8) +#define opt_copy_image_data_16x16_stride (opt.copy_image_data_16x16_stride) +#define opt_code_from_bitstream_2d_5_4 (opt.code_from_bitstream_2d_5_4) +#define opt_code_from_bitstream_2d_17_4 (opt.code_from_bitstream_2d_17_4) +#define opt_code_from_bitstream_2d_16_1 (opt.code_from_bitstream_2d_16_1)
\ No newline at end of file diff --git a/Src/h264dec/ldecod/inc/output.h b/Src/h264dec/ldecod/inc/output.h new file mode 100644 index 00000000..69e06f85 --- /dev/null +++ b/Src/h264dec/ldecod/inc/output.h @@ -0,0 +1,27 @@ + +/*! + ************************************************************************************** + * \file + * output.h + * \brief + * Picture writing routine headers + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Suehring <suehring@hhi.de> + *************************************************************************************** + */ + +#ifndef _OUTPUT_H_ +#define _OUTPUT_H_ + + +extern void write_stored_frame(VideoParameters *p_Vid, FrameStore *fs); +extern void direct_output (VideoParameters *p_Vid, StorablePicture *p); +extern void init_out_buffer (VideoParameters *p_Vid); +extern void uninit_out_buffer (VideoParameters *p_Vid); + +#if (PAIR_FIELDS_IN_OUTPUT) +extern void flush_pending_output(VideoParameters *p_Vid); +#endif + +#endif //_OUTPUT_H_ diff --git a/Src/h264dec/ldecod/inc/parset.h b/Src/h264dec/ldecod/inc/parset.h new file mode 100644 index 00000000..c433c26d --- /dev/null +++ b/Src/h264dec/ldecod/inc/parset.h @@ -0,0 +1,56 @@ + +/*! + ************************************************************************************** + * \file + * parset.h + * \brief + * Picture and Sequence Parameter Sets, decoder operations + * + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + *************************************************************************************** + */ + + +#ifndef _PARSET_H_ +#define _PARSET_H_ + +#include "parsetcommon.h" +#include "nalucommon.h" + +static const byte ZZ_SCAN[16] = +{ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +static const byte ZZ_SCAN8[64] = +{ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 +}; + +extern void Scaling_List(int *scalingList, int sizeOfScalingList, Boolean *UseDefaultScalingMatrix, Bitstream *s); + +extern void InitVUI(seq_parameter_set_rbsp_t *sps); +extern int ReadVUI(DataPartition *p, seq_parameter_set_rbsp_t *sps); +extern int ReadHRDParameters(DataPartition *p, hrd_parameters_t *hrd); + +extern void PPSConsistencyCheck (pic_parameter_set_rbsp_t *pps); +extern void SPSConsistencyCheck (seq_parameter_set_rbsp_t *sps); + +extern void MakePPSavailable (VideoParameters *p_Vid, int id, pic_parameter_set_rbsp_t *pps); +extern void MakeSPSavailable (VideoParameters *p_Vid, int id, seq_parameter_set_rbsp_t *sps); + +extern void ProcessSPS (VideoParameters *p_Vid, NALU_t *nalu); +extern void ProcessPPS (VideoParameters *p_Vid, NALU_t *nalu); + +extern void UseParameterSet (Slice *currSlice, int PicParsetId); + +extern void CleanUpPPS(VideoParameters *p_Vid); + +extern void activate_sps (VideoParameters *p_Vid, seq_parameter_set_rbsp_t *sps); +extern void activate_pps (VideoParameters *p_Vid, pic_parameter_set_rbsp_t *pps); + +#endif diff --git a/Src/h264dec/ldecod/inc/parsetcommon.h b/Src/h264dec/ldecod/inc/parsetcommon.h new file mode 100644 index 00000000..16a64098 --- /dev/null +++ b/Src/h264dec/ldecod/inc/parsetcommon.h @@ -0,0 +1,202 @@ + +/*! + ************************************************************************************** + * \file + * parsetcommon.h + * \brief + * Picture and Sequence Parameter Sets, structures common to encoder and decoder + * + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + *************************************************************************************** + */ + + + +// In the JVT syntax, frequently flags are used that indicate the presence of +// certain pieces of information in the NALU. Here, these flags are also +// present. In the encoder, those bits indicate that the values signaled to +// be present are meaningful and that this part of the syntax should be +// written to the NALU. In the decoder, the flag indicates that information +// was received from the decoded NALU and should be used henceforth. +// The structure names were chosen as indicated in the JVT syntax + +#ifndef _PARSETCOMMON_H_ +#define _PARSETCOMMON_H_ + +#include "defines.h" + +#define MAXIMUMPARSETRBSPSIZE 1500 +#define MAXIMUMPARSETNALUSIZE 1500 + +#define MAXSPS 32 +#define MAXPPS 256 + +#define MAXIMUMVALUEOFcpb_cnt 32 +typedef struct +{ + unsigned int cpb_cnt_minus1; // ue(v) + unsigned int bit_rate_scale; // u(4) + unsigned int cpb_size_scale; // u(4) + unsigned int bit_rate_value_minus1 [MAXIMUMVALUEOFcpb_cnt]; // ue(v) + unsigned int cpb_size_value_minus1 [MAXIMUMVALUEOFcpb_cnt]; // ue(v) + unsigned int cbr_flag [MAXIMUMVALUEOFcpb_cnt]; // u(1) + unsigned int initial_cpb_removal_delay_length_minus1; // u(5) + unsigned int cpb_removal_delay_length_minus1; // u(5) + unsigned int dpb_output_delay_length_minus1; // u(5) + unsigned int time_offset_length; // u(5) +} hrd_parameters_t; + + +typedef struct +{ + Boolean aspect_ratio_info_present_flag; // u(1) + unsigned int aspect_ratio_idc; // u(8) + unsigned short sar_width; // u(16) + unsigned short sar_height; // u(16) + Boolean overscan_info_present_flag; // u(1) + Boolean overscan_appropriate_flag; // u(1) + Boolean video_signal_type_present_flag; // u(1) + unsigned int video_format; // u(3) + Boolean video_full_range_flag; // u(1) + Boolean colour_description_present_flag; // u(1) + unsigned int colour_primaries; // u(8) + unsigned int transfer_characteristics; // u(8) + unsigned int matrix_coefficients; // u(8) + Boolean chroma_location_info_present_flag; // u(1) + unsigned int chroma_sample_loc_type_top_field; // ue(v) + unsigned int chroma_sample_loc_type_bottom_field; // ue(v) + Boolean timing_info_present_flag; // u(1) + unsigned int num_units_in_tick; // u(32) + unsigned int time_scale; // u(32) + Boolean fixed_frame_rate_flag; // u(1) + Boolean nal_hrd_parameters_present_flag; // u(1) + hrd_parameters_t nal_hrd_parameters; // hrd_paramters_t + Boolean vcl_hrd_parameters_present_flag; // u(1) + hrd_parameters_t vcl_hrd_parameters; // hrd_paramters_t + // if ((nal_hrd_parameters_present_flag || (vcl_hrd_parameters_present_flag)) + Boolean low_delay_hrd_flag; // u(1) + Boolean pic_struct_present_flag; // u(1) + Boolean bitstream_restriction_flag; // u(1) + Boolean motion_vectors_over_pic_boundaries_flag; // u(1) + unsigned int max_bytes_per_pic_denom; // ue(v) + unsigned int max_bits_per_mb_denom; // ue(v) + unsigned int log2_max_mv_length_vertical; // ue(v) + unsigned int log2_max_mv_length_horizontal; // ue(v) + unsigned int num_reorder_frames; // ue(v) + unsigned int max_dec_frame_buffering; // ue(v) +} vui_seq_parameters_t; + + +#define MAXnum_slice_groups_minus1 8 +typedef struct +{ + Boolean Valid; // indicates the parameter set is valid + unsigned int pic_parameter_set_id; // ue(v) + unsigned int seq_parameter_set_id; // ue(v) + Boolean entropy_coding_mode_flag; // u(1) + Boolean transform_8x8_mode_flag; // u(1) + + Boolean pic_scaling_matrix_present_flag; // u(1) + int pic_scaling_list_present_flag[12]; // u(1) + int ScalingList4x4[6][16]; // se(v) + int ScalingList8x8[6][64]; // se(v) + Boolean UseDefaultScalingMatrix4x4Flag[6]; + Boolean UseDefaultScalingMatrix8x8Flag[6]; + + // if( pic_order_cnt_type < 2 ) in the sequence parameter set + Boolean bottom_field_pic_order_in_frame_present_flag; // u(1) + unsigned int num_slice_groups_minus1; // ue(v) + unsigned int slice_group_map_type; // ue(v) + // if( slice_group_map_type = = 0 ) + unsigned int run_length_minus1[MAXnum_slice_groups_minus1]; // ue(v) + // else if( slice_group_map_type = = 2 ) + unsigned int top_left[MAXnum_slice_groups_minus1]; // ue(v) + unsigned int bottom_right[MAXnum_slice_groups_minus1]; // ue(v) + // else if( slice_group_map_type = = 3 || 4 || 5 + Boolean slice_group_change_direction_flag; // u(1) + unsigned int slice_group_change_rate_minus1; // ue(v) + // else if( slice_group_map_type = = 6 ) + unsigned int pic_size_in_map_units_minus1; // ue(v) + byte *slice_group_id; // complete MBAmap u(v) + + int num_ref_idx_l0_active_minus1; // ue(v) + int num_ref_idx_l1_active_minus1; // ue(v) + Boolean weighted_pred_flag; // u(1) + unsigned int weighted_bipred_idc; // u(2) + int pic_init_qp_minus26; // se(v) + int pic_init_qs_minus26; // se(v) + int chroma_qp_index_offset; // se(v) + + int second_chroma_qp_index_offset; // se(v) + + Boolean deblocking_filter_control_present_flag; // u(1) + Boolean constrained_intra_pred_flag; // u(1) + Boolean redundant_pic_cnt_present_flag; // u(1) +} pic_parameter_set_rbsp_t; + + +#define MAXnum_ref_frames_in_pic_order_cnt_cycle 256 +typedef struct +{ + Boolean Valid; // indicates the parameter set is valid + + unsigned int profile_idc; // u(8) + Boolean constrained_set0_flag; // u(1) + Boolean constrained_set1_flag; // u(1) + Boolean constrained_set2_flag; // u(1) + Boolean constrained_set3_flag; // u(1) + unsigned int level_idc; // u(8) + unsigned int seq_parameter_set_id; // ue(v) + unsigned int chroma_format_idc; // ue(v) + + Boolean seq_scaling_matrix_present_flag; // u(1) + int seq_scaling_list_present_flag[12]; // u(1) + int ScalingList4x4[6][16]; // se(v) + int ScalingList8x8[6][64]; // se(v) + Boolean UseDefaultScalingMatrix4x4Flag[6]; + Boolean UseDefaultScalingMatrix8x8Flag[6]; + + unsigned int bit_depth_luma_minus8; // ue(v) + unsigned int bit_depth_chroma_minus8; // ue(v) + unsigned int log2_max_frame_num_minus4; // ue(v) + unsigned int pic_order_cnt_type; + // if( pic_order_cnt_type == 0 ) + unsigned int log2_max_pic_order_cnt_lsb_minus4; // ue(v) + // else if( pic_order_cnt_type == 1 ) + Boolean delta_pic_order_always_zero_flag; // u(1) + int offset_for_non_ref_pic; // se(v) + int offset_for_top_to_bottom_field; // se(v) + unsigned int num_ref_frames_in_pic_order_cnt_cycle; // ue(v) + // for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ ) + int offset_for_ref_frame[MAXnum_ref_frames_in_pic_order_cnt_cycle]; // se(v) + unsigned int num_ref_frames; // ue(v) + Boolean gaps_in_frame_num_value_allowed_flag; // u(1) + unsigned int pic_width_in_mbs_minus1; // ue(v) + unsigned int pic_height_in_map_units_minus1; // ue(v) + Boolean frame_mbs_only_flag; // u(1) + // if( !frame_mbs_only_flag ) + Boolean mb_adaptive_frame_field_flag; // u(1) + Boolean direct_8x8_inference_flag; // u(1) + Boolean frame_cropping_flag; // u(1) + unsigned int frame_cropping_rect_left_offset; // ue(v) + unsigned int frame_cropping_rect_right_offset; // ue(v) + unsigned int frame_cropping_rect_top_offset; // ue(v) + unsigned int frame_cropping_rect_bottom_offset; // ue(v) + Boolean vui_parameters_present_flag; // u(1) + vui_seq_parameters_t vui_seq_parameters; // vui_seq_parameters_t + unsigned separate_colour_plane_flag; // u(1) +} seq_parameter_set_rbsp_t; + +pic_parameter_set_rbsp_t *AllocPPS (void); +seq_parameter_set_rbsp_t *AllocSPS (void); + +void FreePPS (pic_parameter_set_rbsp_t *pps); +void FreeSPS (seq_parameter_set_rbsp_t *sps); + +int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2); +int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2); + +#endif diff --git a/Src/h264dec/ldecod/inc/quant.h b/Src/h264dec/ldecod/inc/quant.h new file mode 100644 index 00000000..01bde65f --- /dev/null +++ b/Src/h264dec/ldecod/inc/quant.h @@ -0,0 +1,169 @@ + +/*! + ************************************************************************ + * \file quant.h + * + * \brief + * definitions for quantization functions + * + * \author + * + ************************************************************************ + */ + +#ifndef _QUANT_H_ +#define _QUANT_H_ + +// exported variables +static const int dequant_coef8[6][64] = +{ + { + 20, 19, 25, 19, 20, 19, 25, 19, + 19, 18, 24, 18, 19, 18, 24, 18, + 25, 24, 32, 24, 25, 24, 32, 24, + 19, 18, 24, 18, 19, 18, 24, 18, + 20, 19, 25, 19, 20, 19, 25, 19, + 19, 18, 24, 18, 19, 18, 24, 18, + 25, 24, 32, 24, 25, 24, 32, 24, + 19, 18, 24, 18, 19, 18, 24, 18 + }, + { + 22, 21, 28, 21, 22, 21, 28, 21, + 21, 19, 26, 19, 21, 19, 26, 19, + 28, 26, 35, 26, 28, 26, 35, 26, + 21, 19, 26, 19, 21, 19, 26, 19, + 22, 21, 28, 21, 22, 21, 28, 21, + 21, 19, 26, 19, 21, 19, 26, 19, + 28, 26, 35, 26, 28, 26, 35, 26, + 21, 19, 26, 19, 21, 19, 26, 19 + }, + { + 26, 24, 33, 24, 26, 24, 33, 24, + 24, 23, 31, 23, 24, 23, 31, 23, + 33, 31, 42, 31, 33, 31, 42, 31, + 24, 23, 31, 23, 24, 23, 31, 23, + 26, 24, 33, 24, 26, 24, 33, 24, + 24, 23, 31, 23, 24, 23, 31, 23, + 33, 31, 42, 31, 33, 31, 42, 31, + 24, 23, 31, 23, 24, 23, 31, 23 + }, + { + 28, 26, 35, 26, 28, 26, 35, 26, + 26, 25, 33, 25, 26, 25, 33, 25, + 35, 33, 45, 33, 35, 33, 45, 33, + 26, 25, 33, 25, 26, 25, 33, 25, + 28, 26, 35, 26, 28, 26, 35, 26, + 26, 25, 33, 25, 26, 25, 33, 25, + 35, 33, 45, 33, 35, 33, 45, 33, + 26, 25, 33, 25, 26, 25, 33, 25 + }, + { + 32, 30, 40, 30, 32, 30, 40, 30, + 30, 28, 38, 28, 30, 28, 38, 28, + 40, 38, 51, 38, 40, 38, 51, 38, + 30, 28, 38, 28, 30, 28, 38, 28, + 32, 30, 40, 30, 32, 30, 40, 30, + 30, 28, 38, 28, 30, 28, 38, 28, + 40, 38, 51, 38, 40, 38, 51, 38, + 30, 28, 38, 28, 30, 28, 38, 28 + }, + { + 36, 34, 46, 34, 36, 34, 46, 34, + 34, 32, 43, 32, 34, 32, 43, 32, + 46, 43, 58, 43, 46, 43, 58, 43, + 34, 32, 43, 32, 34, 32, 43, 32, + 36, 34, 46, 34, 36, 34, 46, 34, + 34, 32, 43, 32, 34, 32, 43, 32, + 46, 43, 58, 43, 46, 43, 58, 43, + 34, 32, 43, 32, 34, 32, 43, 32 + } +}; + + +//! Dequantization coefficients +static const int dequant_coef[6][4][4] = { + { + { 10, 13, 10, 13}, + { 13, 16, 13, 16}, + { 10, 13, 10, 13}, + { 13, 16, 13, 16}}, + { + { 11, 14, 11, 14}, + { 14, 18, 14, 18}, + { 11, 14, 11, 14}, + { 14, 18, 14, 18}}, + { + { 13, 16, 13, 16}, + { 16, 20, 16, 20}, + { 13, 16, 13, 16}, + { 16, 20, 16, 20}}, + { + { 14, 18, 14, 18}, + { 18, 23, 18, 23}, + { 14, 18, 14, 18}, + { 18, 23, 18, 23}}, + { + { 16, 20, 16, 20}, + { 20, 25, 20, 25}, + { 16, 20, 16, 20}, + { 20, 25, 20, 25}}, + { + { 18, 23, 18, 23}, + { 23, 29, 23, 29}, + { 18, 23, 18, 23}, + { 23, 29, 23, 29}} +}; + +static const int quant_coef[6][4][4] = { + { + { 13107, 8066, 13107, 8066}, + { 8066, 5243, 8066, 5243}, + { 13107, 8066, 13107, 8066}, + { 8066, 5243, 8066, 5243}}, + { + { 11916, 7490, 11916, 7490}, + { 7490, 4660, 7490, 4660}, + { 11916, 7490, 11916, 7490}, + { 7490, 4660, 7490, 4660}}, + { + { 10082, 6554, 10082, 6554}, + { 6554, 4194, 6554, 4194}, + { 10082, 6554, 10082, 6554}, + { 6554, 4194, 6554, 4194}}, + { + { 9362, 5825, 9362, 5825}, + { 5825, 3647, 5825, 3647}, + { 9362, 5825, 9362, 5825}, + { 5825, 3647, 5825, 3647}}, + { + { 8192, 5243, 8192, 5243}, + { 5243, 3355, 5243, 3355}, + { 8192, 5243, 8192, 5243}, + { 5243, 3355, 5243, 3355}}, + { + { 7282, 4559, 7282, 4559}, + { 4559, 2893, 4559, 2893}, + { 7282, 4559, 7282, 4559}, + { 4559, 2893, 4559, 2893}} +}; + +// SP decoding parameter (EQ. 8-425) +static const int A[4][4] = { + { 16, 20, 16, 20}, + { 20, 25, 20, 25}, + { 16, 20, 16, 20}, + { 20, 25, 20, 25} +}; + +// exported functions +// quantization initialization +extern void init_qp_process (VideoParameters *p_Vid); +extern void free_qp_matrices(VideoParameters *p_Vid); + +// For Q-matrix +extern void assign_quant_params (Slice *currslice); +extern void CalculateQuant4x4Param(Slice *currslice); + + +#endif + diff --git a/Src/h264dec/ldecod/inc/sei.h b/Src/h264dec/ldecod/inc/sei.h new file mode 100644 index 00000000..943428fd --- /dev/null +++ b/Src/h264dec/ldecod/inc/sei.h @@ -0,0 +1,100 @@ + +/*! + ************************************************************************************* + * \file sei.h + * + * \brief + * Prototypes for sei.c + ************************************************************************************* + */ + +#ifndef SEI_H +#define SEI_H + +typedef enum { + SEI_BUFFERING_PERIOD = 0, + SEI_PIC_TIMING, + SEI_PAN_SCAN_RECT, + SEI_FILLER_PAYLOAD, + SEI_USER_DATA_REGISTERED_ITU_T_T35, + SEI_USER_DATA_UNREGISTERED, + SEI_RECOVERY_POINT, + SEI_DEC_REF_PIC_MARKING_REPETITION, + SEI_SPARE_PIC, + SEI_SCENE_INFO, + SEI_SUB_SEQ_INFO, + SEI_SUB_SEQ_LAYER_CHARACTERISTICS, + SEI_SUB_SEQ_CHARACTERISTICS, + SEI_FULL_FRAME_FREEZE, + SEI_FULL_FRAME_FREEZE_RELEASE, + SEI_FULL_FRAME_SNAPSHOT, + SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START, + SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END, + SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET, + SEI_FILM_GRAIN_CHARACTERISTICS, + SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE, + SEI_STEREO_VIDEO_INFO, + SEI_POST_FILTER_HINTS, + SEI_TONE_MAPPING, + + SEI_MAX_ELEMENTS //!< number of maximum syntax elements +} SEI_type; + +#define MAX_FN 256 +// tone mapping information +#define MAX_CODED_BIT_DEPTH 12 +#define MAX_SEI_BIT_DEPTH 12 +#define MAX_NUM_PIVOTS (1<<MAX_CODED_BIT_DEPTH) + +#if (ENABLE_OUTPUT_TONEMAPPING) +typedef struct tone_mapping_struct_s +{ + Boolean seiHasTone_mapping; + unsigned int tone_map_repetition_period; + unsigned char coded_data_bit_depth; + unsigned char sei_bit_depth; + unsigned int model_id; + unsigned int count; + + imgpel lut[1<<MAX_CODED_BIT_DEPTH]; //<! look up table for mapping the coded data value to output data value + + Bitstream *data; + int payloadSize; +} ToneMappingSEI; + +#endif + +void InterpretSEIMessage(byte* msg, int size, VideoParameters *p_Vid); +void interpret_spare_pic( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_subsequence_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_subsequence_layer_characteristics_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_subsequence_characteristics_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_scene_information( byte* payload, int size, VideoParameters *p_Vid ); // JVT-D099 +void interpret_user_data_registered_itu_t_t35_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_user_data_unregistered_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_pan_scan_rect_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_recovery_point_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_filler_payload_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_dec_ref_pic_marking_repetition_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_full_frame_freeze_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_full_frame_freeze_release_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_full_frame_snapshot_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_progressive_refinement_start_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_progressive_refinement_end_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_motion_constrained_slice_group_set_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_reserved_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_buffering_period_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_picture_timing_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_film_grain_characteristics_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_deblocking_filter_display_preference_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_stereo_video_info_info( byte* payload, int size, VideoParameters *p_Vid ); +void interpret_post_filter_hints_info( byte* payload, int size, VideoParameters *p_Vid ); +// functions for tone mapping SEI message +void interpret_tone_mapping( byte* payload, int size, VideoParameters *p_Vid ); + +#if (ENABLE_OUTPUT_TONEMAPPING) +void tone_map(imgpel** imgX, imgpel* lut, int size_x, int size_y); +void init_tone_mapping_sei(ToneMappingSEI *seiToneMapping); +void update_tone_mapping_sei(ToneMappingSEI *seiToneMapping); +#endif +#endif diff --git a/Src/h264dec/ldecod/inc/transform8x8.h b/Src/h264dec/ldecod/inc/transform8x8.h new file mode 100644 index 00000000..88cfafee --- /dev/null +++ b/Src/h264dec/ldecod/inc/transform8x8.h @@ -0,0 +1,24 @@ +/*! + *************************************************************************** + * + * \file transform8x8.h + * + * \brief + * prototypes of 8x8 transform functions + * + * \date + * 9. October 2003 + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Yuri Vatis + **************************************************************************/ + +#ifndef _TRANSFORM8X8_H_ +#define _TRANSFORM8X8_H_ + +extern void itrans8x8_mmx(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x); +extern void itrans8x8_sse2(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x); +extern void itrans8x8_c(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x); +extern void itrans8x8_lossless(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x); +#endif diff --git a/Src/h264dec/ldecod/inc/vlc.h b/Src/h264dec/ldecod/inc/vlc.h new file mode 100644 index 00000000..9a75ad3a --- /dev/null +++ b/Src/h264dec/ldecod/inc/vlc.h @@ -0,0 +1,122 @@ + +/*! + ************************************************************************ + * \file vlc.h + * + * \brief + * header for (CA)VLC coding functions + * + * \author + * Karsten Suehring + * + ************************************************************************ + */ + +#ifndef _VLC_H_ +#define _VLC_H_ + +#include <bfc/platform/types.h> + +//! gives CBP value from codeword number, both for intra and inter +static const byte NCBP[2][48][2]= +{ + { // 0 1 2 3 4 5 6 7 8 9 10 11 + {15, 0},{ 0, 1},{ 7, 2},{11, 4},{13, 8},{14, 3},{ 3, 5},{ 5,10},{10,12},{12,15},{ 1, 7},{ 2,11}, + { 4,13},{ 8,14},{ 6, 6},{ 9, 9},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}, + { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}, + { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0} + }, + { + {47, 0},{31,16},{15, 1},{ 0, 2},{23, 4},{27, 8},{29,32},{30, 3},{ 7, 5},{11,10},{13,12},{14,15}, + {39,47},{43, 7},{45,11},{46,13},{16,14},{ 3, 6},{ 5, 9},{10,31},{12,35},{19,37},{21,42},{26,44}, + {28,33},{35,34},{37,36},{42,40},{44,39},{ 1,43},{ 2,45},{ 4,46},{ 8,17},{17,18},{18,20},{20,24}, + {24,19},{ 6,21},{ 9,26},{22,28},{25,23},{32,27},{33,29},{34,30},{36,22},{40,25},{38,38},{41,41} + } +}; + +//! for the linfo_levrun_inter routine +static const byte NTAB1[4][8][2] = +{ + {{1,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,1},{1,2},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{2,0},{1,3},{1,4},{1,5},{0,0},{0,0},{0,0},{0,0}}, + {{3,0},{2,1},{2,2},{1,6},{1,7},{1,8},{1,9},{4,0}}, +}; + +static const byte LEVRUN1[16]= +{ + 4,2,2,1,1,1,1,1,1,1,0,0,0,0,0,0, +}; + + +static const byte NTAB2[4][8][2] = +{ + {{1,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,1},{2,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,2},{3,0},{4,0},{5,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,3},{1,4},{2,1},{3,1},{6,0},{7,0},{8,0},{9,0}}, +}; + +//! for the linfo_levrun__c2x2 routine +static const byte LEVRUN3[4] = +{ + 2,1,0,0 +}; + +static const byte NTAB3[2][2][2] = +{ + {{1,0},{0,0}}, + {{2,0},{1,1}}, +}; + +extern int se_v (const char *tracestring, Bitstream *bitstream); +extern int ue_v (const char *tracestring, Bitstream *bitstream); +extern Boolean u_1 (const char *tracestring, Bitstream *bitstream); +extern int u_v (int LenInBits, const char *tracestring, Bitstream *bitstream); +extern int i_v (int LenInBits, const char *tracestring, Bitstream *bitstream); + +// CAVLC mapping +extern void linfo_ue(int len, int info, int *value1, int *dummy); +extern void linfo_se(int len, int info, int *value1, int *dummy); + +extern void linfo_cbp_intra_normal(int len,int info,int *cbp, int *dummy); +extern void linfo_cbp_inter_normal(int len,int info,int *cbp, int *dummy); +extern void linfo_cbp_intra_other(int len,int info,int *cbp, int *dummy); +extern void linfo_cbp_inter_other(int len,int info,int *cbp, int *dummy); + +extern void linfo_levrun_inter(int len,int info,int *level,int *irun); +extern void linfo_levrun_c2x2(int len,int info,int *level,int *irun); + +extern int uvlc_startcode_follows(Slice *currSlice, int dummy); + +extern int readSyntaxElement_VLC (SyntaxElement *sym, Bitstream *currStream); +extern int readSyntaxElement_UVLC(SyntaxElement *, struct datapartition *); +extern int readSyntaxElement_Intra4x4PredictionMode(SyntaxElement *sym, Bitstream *currStream); + +extern int GetVLCSymbol (const uint8_t buffer[],int totbitoffset,int *info, int bytecount); +extern int GetVLCSymbol_IntraMode (const uint8_t buffer[],int totbitoffset,int *info, int bytecount); + +extern int readSyntaxElement_FLC (Bitstream *currStream, int numbits); +extern int readSyntaxElement_NumCoeffTrailingOnes (SyntaxElement *sym, Bitstream *currStream, int vlcnum); +extern int readSyntaxElement_NumCoeffTrailingOnesChromaDC(VideoParameters *p_Vid, SyntaxElement *sym, Bitstream *currStream); +extern int readSyntaxElement_Level_VLC0 (Bitstream *currStream); +extern int readSyntaxElement_Level_VLCN (int vlc, Bitstream *currStream); +extern int readSyntaxElement_TotalZeros (Bitstream *currStream, int vlcnum); +extern int readSyntaxElement_TotalZerosChromaDC (VideoParameters *p_Vid, Bitstream *currStream, int vlcnum); +extern int readSyntaxElement_Run (Bitstream *currStream, int vlcnum); +extern int GetBits (const uint8_t buffer[],int totbitoffset,int *info, int bitcount, int numbits); + + +extern int more_rbsp_data (const uint8_t buffer[],int totbitoffset,int bytecount); + +int code_from_bitstream_2d_17_4_c(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab); +int code_from_bitstream_2d_17_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab); + +int code_from_bitstream_2d_5_4_c(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab); +int code_from_bitstream_2d_5_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab); + +int code_from_bitstream_2d_16_1_c(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab); +int code_from_bitstream_2d_16_1_sse2(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab); + +#endif + diff --git a/Src/h264dec/ldecod/src/biari.asm b/Src/h264dec/ldecod/src/biari.asm new file mode 100644 index 00000000..ca5f3d6d --- /dev/null +++ b/Src/h264dec/ldecod/src/biari.asm @@ -0,0 +1,2540 @@ +.686 +.XMM +.model FLAT + +; Slice +tex_ctx@Slice = 100 +coeff@Slice = 15632 +coeff_ctr@Slice = 15760 +pos@Slice = 15764 +last_dquant@Slice = 88 +mot_ctx@Slice = 96 +slice_type@Slice = 64 + +; VideoParameters +structure@VideoParameters = 697200 +dec_picture@VideoParameters = 698192 +bitdepth_chroma_qp_scale@VideoParameters = 697456 + +; Macroblock +p_Slice@Macroblock = 0 +p_Vid@Macroblock = 4 +qp@macroblock = 60 +qp_scaled@Macroblock = 72 +mb_field@Macroblock = 344 +read_and_store_CBP_block_bit@Macroblock = 400 + +; StorablePicture +structure@StorablePicture = 0 +chroma_qp_offset@StorablePicture = 158688 + +; TextureInfoContexts +map_contexts@TextureInfoContexts = 436 +last_contexts@TextureInfoContexts = 3252 +one_contexts@TextureInfoContexts = 6068 +abs_contexts@TextureInfoContexts = 6508 + +_DATA SEGMENT +_pos2ctx_map DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map8x8 + DD FLAT:_pos2ctx_map8x4 + DD FLAT:_pos2ctx_map8x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map2x4c + DD FLAT:_pos2ctx_map4x4c + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map8x8 + DD FLAT:_pos2ctx_map8x4 + DD FLAT:_pos2ctx_map8x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map8x8 + DD FLAT:_pos2ctx_map8x4 + DD FLAT:_pos2ctx_map8x4 + DD FLAT:_pos2ctx_map4x4 +_pos2ctx_map_int DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map8x8i + DD FLAT:_pos2ctx_map8x4i + DD FLAT:_pos2ctx_map4x8i + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map2x4c + DD FLAT:_pos2ctx_map4x4c + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map8x8i + DD FLAT:_pos2ctx_map8x4i + DD FLAT:_pos2ctx_map8x4i + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map4x4 + DD FLAT:_pos2ctx_map8x8i + DD FLAT:_pos2ctx_map8x4i + DD FLAT:_pos2ctx_map8x4i + DD FLAT:_pos2ctx_map4x4 +_pos2ctx_last DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last8x8 + DD FLAT:_pos2ctx_last8x4 + DD FLAT:_pos2ctx_last8x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last2x4c + DD FLAT:_pos2ctx_last4x4c + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last8x8 + DD FLAT:_pos2ctx_last8x4 + DD FLAT:_pos2ctx_last8x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last4x4 + DD FLAT:_pos2ctx_last8x8 + DD FLAT:_pos2ctx_last8x4 + DD FLAT:_pos2ctx_last8x4 + DD FLAT:_pos2ctx_last4x4 +_DATA ENDS + +CONST SEGMENT +_rLPS_table_64x4 DB 080H + DB 080H + DB 080H + DB 07bH + DB 074H + DB 06fH + DB 069H + DB 064H + DB 05fH + DB 05aH + DB 055H + DB 051H + DB 04dH + DB 049H + DB 045H + DB 042H + DB 03eH + DB 03bH + DB 038H + DB 035H + DB 033H + DB 030H + DB 02eH + DB 02bH + DB 029H + DB 027H + DB 025H + DB 023H + DB 021H + DB 020H + DB 01eH + DB 01dH + DB 01bH + DB 01aH + DB 018H + DB 017H + DB 016H + DB 015H + DB 014H + DB 013H + DB 012H + DB 011H + DB 010H + DB 0fH + DB 0eH + DB 0eH + DB 0dH + DB 0cH + DB 0cH + DB 0bH + DB 0bH + DB 0aH + DB 0aH + DB 09H + DB 09H + DB 08H + DB 08H + DB 07H + DB 07H + DB 07H + DB 06H + DB 06H + DB 06H + DB 02H + DB 0b0H + DB 0a7H + DB 09eH + DB 096H + DB 08eH + DB 087H + DB 080H + DB 07aH + DB 074H + DB 06eH + DB 068H + DB 063H + DB 05eH + DB 059H + DB 055H + DB 050H + DB 04cH + DB 048H + DB 045H + DB 041H + DB 03eH + DB 03bH + DB 038H + DB 035H + DB 032H + DB 030H + DB 02dH + DB 02bH + DB 029H + DB 027H + DB 025H + DB 023H + DB 021H + DB 01fH + DB 01eH + DB 01cH + DB 01bH + DB 01aH + DB 018H + DB 017H + DB 016H + DB 015H + DB 014H + DB 013H + DB 012H + DB 011H + DB 010H + DB 0fH + DB 0eH + DB 0eH + DB 0dH + DB 0cH + DB 0cH + DB 0bH + DB 0bH + DB 0aH + DB 09H + DB 09H + DB 09H + DB 08H + DB 08H + DB 07H + DB 07H + DB 02H + DB 0d0H + DB 0c5H + DB 0bbH + DB 0b2H + DB 0a9H + DB 0a0H + DB 098H + DB 090H + DB 089H + DB 082H + DB 07bH + DB 075H + DB 06fH + DB 069H + DB 064H + DB 05fH + DB 05aH + DB 056H + DB 051H + DB 04dH + DB 049H + DB 045H + DB 042H + DB 03fH + DB 03bH + DB 038H + DB 036H + DB 033H + DB 030H + DB 02eH + DB 02bH + DB 029H + DB 027H + DB 025H + DB 023H + DB 021H + DB 020H + DB 01eH + DB 01dH + DB 01bH + DB 01aH + DB 019H + DB 017H + DB 016H + DB 015H + DB 014H + DB 013H + DB 012H + DB 011H + DB 010H + DB 0fH + DB 0fH + DB 0eH + DB 0dH + DB 0cH + DB 0cH + DB 0bH + DB 0bH + DB 0aH + DB 0aH + DB 09H + DB 09H + DB 08H + DB 02H + DB 0f0H + DB 0e3H + DB 0d8H + DB 0cdH + DB 0c3H + DB 0b9H + DB 0afH + DB 0a6H + DB 09eH + DB 096H + DB 08eH + DB 087H + DB 080H + DB 07aH + DB 074H + DB 06eH + DB 068H + DB 063H + DB 05eH + DB 059H + DB 055H + DB 050H + DB 04cH + DB 048H + DB 045H + DB 041H + DB 03eH + DB 03bH + DB 038H + DB 035H + DB 032H + DB 030H + DB 02dH + DB 02bH + DB 029H + DB 027H + DB 025H + DB 023H + DB 021H + DB 01fH + DB 01eH + DB 01cH + DB 01bH + DB 019H + DB 018H + DB 017H + DB 016H + DB 015H + DB 014H + DB 013H + DB 012H + DB 011H + DB 010H + DB 0fH + DB 0eH + DB 0eH + DB 0dH + DB 0cH + DB 0cH + DB 0bH + DB 0bH + DB 0aH + DB 09H + DB 02H +_AC_next_state_MPS_64 DB 01H + DB 02H + DB 03H + DB 04H + DB 05H + DB 06H + DB 07H + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 0cH + DB 0dH + DB 0eH + DB 0fH + DB 010H + DB 011H + DB 012H + DB 013H + DB 014H + DB 015H + DB 016H + DB 017H + DB 018H + DB 019H + DB 01aH + DB 01bH + DB 01cH + DB 01dH + DB 01eH + DB 01fH + DB 020H + DB 021H + DB 022H + DB 023H + DB 024H + DB 025H + DB 026H + DB 027H + DB 028H + DB 029H + DB 02aH + DB 02bH + DB 02cH + DB 02dH + DB 02eH + DB 02fH + DB 030H + DB 031H + DB 032H + DB 033H + DB 034H + DB 035H + DB 036H + DB 037H + DB 038H + DB 039H + DB 03aH + DB 03bH + DB 03cH + DB 03dH + DB 03eH + DB 03eH + DB 03fH +_AC_next_state_LPS_64 DB 00H + DB 00H + DB 01H + DB 02H + DB 02H + DB 04H + DB 04H + DB 05H + DB 06H + DB 07H + DB 08H + DB 09H + DB 09H + DB 0bH + DB 0bH + DB 0cH + DB 0dH + DB 0dH + DB 0fH + DB 0fH + DB 010H + DB 010H + DB 012H + DB 012H + DB 013H + DB 013H + DB 015H + DB 015H + DB 016H + DB 016H + DB 017H + DB 018H + DB 018H + DB 019H + DB 01aH + DB 01aH + DB 01bH + DB 01bH + DB 01cH + DB 01dH + DB 01dH + DB 01eH + DB 01eH + DB 01eH + DB 01fH + DB 020H + DB 020H + DB 021H + DB 021H + DB 021H + DB 022H + DB 022H + DB 023H + DB 023H + DB 023H + DB 024H + DB 024H + DB 024H + DB 025H + DB 025H + DB 025H + DB 026H + DB 026H + DB 03fH +_renorm_table_32 DB 06H + DB 05H + DB 04H + DB 04H + DB 03H + DB 03H + DB 03H + DB 03H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + _renorm_table_256 DB 06H + DB 06H + DB 06H + DB 06H + DB 06H + DB 06H + DB 06H + DB 06H + DB 05H + DB 05H + DB 05H + DB 05H + DB 05H + DB 05H + DB 05H + DB 05H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H +_maxpos DB 0fH + DB 0eH + DB 03fH + DB 01fH + DB 01fH + DB 0fH + DB 03H + DB 0eH + DB 07H + DB 0fH + DB 0fH + DB 0eH + DB 03fH + DB 01fH + DB 01fH + DB 0fH + DB 0fH + DB 0eH + DB 03fH + DB 01fH + DB 01fH + DB 0fH + ORG $+2 +_c1isdc DB 01H + DB 00H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 00H + DB 01H + DB 01H + DB 01H + DB 00H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 00H + DB 01H + DB 01H + DB 01H + DB 01H + ORG $+2 +_type2ctx_bcbp DB 00H + DB 01H + DB 02H + DB 03H + DB 03H + DB 04H + DB 05H + DB 06H + DB 05H + DB 05H + DB 0aH + DB 0bH + DB 0cH + DB 0dH + DB 0dH + DB 0eH + DB 010H + DB 011H + DB 012H + DB 013H + DB 013H + DB 014H + ORG $+2 +_type2ctx_map DW 00H +DW 010H +DW 020H +DW 030H +DW 040H +DW 050H +DW 060H +DW 070H +DW 060H +DW 060H +DW 0A0H +DW 0B0H +DW 0C0H +DW 0D0H +DW 0E0H +DW 0F0H +DW 0100H +DW 0110H +DW 0120H +DW 0130H +DW 0140H +DW 0150H + ORG $+2 +_type2ctx_last DW 00H +DW 010H +DW 020H +DW 030H +DW 040H +DW 050H +DW 060H +DW 070H +DW 060H +DW 060H +DW 0A0H +DW 0B0H +DW 0C0H +DW 0D0H +DW 0E0H +DW 0F0H +DW 0100H +DW 0110H +DW 0120H +DW 0130H +DW 0140H +DW 0150H + ORG $+2 +_type2ctx_one DB 00H + DB 01H + DB 02H + DB 03H + DB 03H + DB 04H + DB 05H + DB 06H + DB 05H + DB 05H + DB 0aH + DB 0bH + DB 0cH + DB 0dH + DB 0dH + DB 0eH + DB 010H + DB 011H + DB 012H + DB 013H + DB 013H + DB 014H + ORG $+2 +_type2ctx_abs DB 00H + DB 01H + DB 02H + DB 03H + DB 03H + DB 04H + DB 05H + DB 06H + DB 05H + DB 05H + DB 0aH + DB 0bH + DB 0cH + DB 0dH + DB 0dH + DB 0eH + DB 010H + DB 011H + DB 012H + DB 013H + DB 013H + DB 014H + ORG $+2 +plus_one_clip4 DD 1,2,3,4,4 +plus_one_clip3 DD 1,2,3,3 +_max_c2 DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip3 + DD plus_one_clip4 + DD plus_one_clip3 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + DD plus_one_clip4 + ORG $+6 +_pos2ctx_map8x8 DB 00H + DB 01H + DB 02H + DB 03H + DB 04H + DB 05H + DB 05H + DB 04H + DB 04H + DB 03H + DB 03H + DB 04H + DB 04H + DB 04H + DB 05H + DB 05H + DB 04H + DB 04H + DB 04H + DB 04H + DB 03H + DB 03H + DB 06H + DB 07H + DB 07H + DB 07H + DB 08H + DB 09H + DB 0aH + DB 09H + DB 08H + DB 07H + DB 07H + DB 06H + DB 0bH + DB 0cH + DB 0dH + DB 0bH + DB 06H + DB 07H + DB 08H + DB 09H + DB 0eH + DB 0aH + DB 09H + DB 08H + DB 06H + DB 0bH + DB 0cH + DB 0dH + DB 0bH + DB 06H + DB 09H + DB 0eH + DB 0aH + DB 09H + DB 0bH + DB 0cH + DB 0dH + DB 0bH + DB 0eH + DB 0aH + DB 0cH + DB 0eH +_pos2ctx_map8x4 DB 00H + DB 01H + DB 02H + DB 03H + DB 04H + DB 05H + DB 07H + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 09H + DB 08H + DB 06H + DB 07H + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 09H + DB 08H + DB 06H + DB 0cH + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 09H + DB 0dH + DB 0dH + DB 0eH + DB 0eH +_pos2ctx_map4x4 DB 00H + DB 01H + DB 02H + DB 03H + DB 04H + DB 05H + DB 06H + DB 07H + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 0cH + DB 0dH + DB 0eH + DB 0eH +_pos2ctx_map2x4c DB 00H + DB 00H + DB 01H + DB 01H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H +_pos2ctx_map4x4c DB 00H + DB 00H + DB 00H + DB 00H + DB 01H + DB 01H + DB 01H + DB 01H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H +_pos2ctx_map8x8i DB 00H + DB 01H + DB 01H + DB 02H + DB 02H + DB 03H + DB 03H + DB 04H + DB 05H + DB 06H + DB 07H + DB 07H + DB 07H + DB 08H + DB 04H + DB 05H + DB 06H + DB 09H + DB 0aH + DB 0aH + DB 08H + DB 0bH + DB 0cH + DB 0bH + DB 09H + DB 09H + DB 0aH + DB 0aH + DB 08H + DB 0bH + DB 0cH + DB 0bH + DB 09H + DB 09H + DB 0aH + DB 0aH + DB 08H + DB 0bH + DB 0cH + DB 0bH + DB 09H + DB 09H + DB 0aH + DB 0aH + DB 08H + DB 0dH + DB 0dH + DB 09H + DB 09H + DB 0aH + DB 0aH + DB 08H + DB 0dH + DB 0dH + DB 09H + DB 09H + DB 0aH + DB 0aH + DB 0eH + DB 0eH + DB 0eH + DB 0eH + DB 0eH + DB 0eH +_pos2ctx_map8x4i DB 00H + DB 01H + DB 02H + DB 03H + DB 04H + DB 05H + DB 06H + DB 03H + DB 04H + DB 05H + DB 06H + DB 03H + DB 04H + DB 07H + DB 06H + DB 08H + DB 09H + DB 07H + DB 06H + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 0cH + DB 0cH + DB 0aH + DB 0bH + DB 0dH + DB 0dH + DB 0eH + DB 0eH + DB 0eH +_pos2ctx_map4x8i DB 00H + DB 01H + DB 01H + DB 01H + DB 02H + DB 03H + DB 03H + DB 04H + DB 04H + DB 04H + DB 05H + DB 06H + DB 02H + DB 07H + DB 07H + DB 08H + DB 08H + DB 08H + DB 05H + DB 06H + DB 09H + DB 0aH + DB 0aH + DB 0bH + DB 0bH + DB 0bH + DB 0cH + DB 0dH + DB 0dH + DB 0eH + DB 0eH + DB 0eH +_pos2ctx_last8x8 DB 00H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 03H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 04H + DB 05H + DB 05H + DB 05H + DB 05H + DB 06H + DB 06H + DB 06H + DB 06H + DB 07H + DB 07H + DB 07H + DB 07H + DB 08H + DB 08H + DB 08H + DB 08H +_pos2ctx_last8x4 DB 00H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 01H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 03H + DB 03H + DB 03H + DB 03H + DB 04H + DB 04H + DB 04H + DB 04H + DB 05H + DB 05H + DB 06H + DB 06H + DB 07H + DB 07H + DB 08H + DB 08H +_pos2ctx_last4x4 DB 00H + DB 01H + DB 02H + DB 03H + DB 04H + DB 05H + DB 06H + DB 07H + DB 08H + DB 09H + DB 0aH + DB 0bH + DB 0cH + DB 0dH + DB 0eH + DB 0fH +_pos2ctx_last2x4c DB 00H + DB 00H + DB 01H + DB 01H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H + DB 02H +_pos2ctx_last4x4c DB 00, 00, 00, 00, 01, 01, 01, 01, 02, 02, 02, 02, 02, 02, 02, 02 +plus_one_clip0_4 DD 0,2,3,4,4 + +align 16 +_QP_SCALE_CR DD 00H + DD 01H + DD 02H + DD 03H + DD 04H + DD 05H + DD 06H + DD 07H + DD 08H + DD 09H + DD 0aH + DD 0bH + DD 0cH + DD 0dH + DD 0eH + DD 0fH + DD 010H + DD 011H + DD 012H + DD 013H + DD 014H + DD 015H + DD 016H + DD 017H + DD 018H + DD 019H + DD 01aH + DD 01bH + DD 01cH + DD 01dH + DD 01dH + DD 01eH + DD 01fH + DD 020H + DD 020H + DD 021H + DD 022H + DD 022H + DD 023H + DD 023H + DD 024H + DD 024H + DD 025H + DD 025H + DD 025H + DD 026H + DD 026H + DD 026H + DD 027H + DD 027H + DD 027H + DD 027H + align 16 +_51 DD 51 +CONST ENDS + + +PUBLIC _biari_decode_symbol +_TEXT SEGMENT +dep = 4 ; size = 4 +bi_ct = 8 ; size = 4 +_biari_decode_symbol PROC + STACKOFFSET=0 + mov edx, DWORD PTR dep[esp+STACKOFFSET] ; edx = dep + STACKOFFSET=STACKOFFSET+4 + push ebx + mov ebx, DWORD PTR bi_ct[esp+STACKOFFSET] ; ebx = bi_ct + movzx eax, WORD PTR [ebx] ; eax = state + push ebp + push edi + STACKOFFSET = STACKOFFSET+8 + + mov edi, DWORD PTR [edx] ; edi = range + mov ecx, edi ; ecx = range + and ecx, 0C0H ; range >>= 6 + movzx ebp, BYTE PTR _rLPS_table_64x4[ecx+eax] ; ebp = rLPS + + ; register state: + ; eax: state (bi_ct->state) + ; ebx: bi_ct + ; edx: dep + ; edi: range + ; ebp: rLPS + + mov ecx, DWORD PTR [edx+8] ; ecx = bitsleft + sub edi, ebp ; range -= rLPS + shl edi, cl ; range << bitsleft + cmp DWORD PTR [edx+4], edi ; value < (range << bitsleft) + jge SHORT CABAC@LPS + + movzx ax, BYTE PTR _AC_next_state_MPS_64[eax] ; eax = state = AC_next_state_MPS_64[state] + shr edi, cl ; undo earlier shift + mov WORD PTR [ebx], ax ; bi_ct->MPS = state + cmp edi, 256 ; 00000100H + setb cl + + ; register state + ; eax: state + ; ebx: bi_ct + ; ecx: state (old) + ; edx: dep + ; edi: range + ; ebp: rLPS + + shl edi, cl + sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft-- + mov DWORD PTR [edx], edi ; dep->Drange = range + movzx eax, BYTE PTR [ebx+2] ; return bit + jz SHORT READ_TWO_BYTES; if (dep->DbitsLeft==0) + + pop edi + pop ebp + pop ebx + ret 0 +align 16 +CABAC@LPS: + sub DWORD PTR [edx+4], edi + movzx cx, BYTE PTR _AC_next_state_LPS_64[eax] ; cx: state = AC_next_state_LPS_64[state] + mov WORD PTR [ebx], cx ; store state back to bi_ct->MPS + + ; register state: + ; eax: state (old) + ; ebx: bi_ct + ; ecx: state (new) + ; edx: dep + ; edi: range + ; ebp: rLPS + + mov edi, ebx + test eax, eax ; if state(old) == 0 + movzx ecx, BYTE PTR _renorm_table_256[ebp] ; ecx = renorm_table_32[rLPS>>3] + sete bl ; bl = 1 [ if state(old) == 0 ] + movzx eax, BYTE PTR [edi+2] + xor eax, 1 + xor BYTE PTR [edi+2], bl ; al ^= bi_ct->state + + ; register state: + ; eax: !state + ; ebx: bi_ct + ; ecx: renorm + ; edx: dep + ; edi: range + ; ebp: rLPS + + shl ebp, cl ; ebp = range = rLPS <<= renorm + sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft -= renorm; + mov DWORD PTR [edx], ebp ; dep->Drange = range; + jle SHORT READ_TWO_BYTES ; if( dep->DbitsLeft <= 0 ) + + ; register state: + ; eax: !state + ; ebx: bi_ct + ; ecx: renorm + ; edx: dep + ; edi: range + ; ebp: range = rLPS <<= renorm + + pop edi + pop ebp + pop ebx + ret 0 +align 16 +READ_TWO_BYTES: + + ; register state: + ; eax: !state + ; ebx: bi_ct + ; ecx: renorm + ; edx: dep + ; edi: range + + mov ebx, DWORD PTR [edx+16] ; eax = dep->Dcodestrm_len + mov ecx, DWORD PTR [ebx] ; ecx = *dep->Dcodestrm_len + lea edi, DWORD PTR [ecx+2] ; edi = *dep->Dcodestrm_len + 2 + mov DWORD PTR [ebx], edi ; *dep->Dcodestrm_len += 2 + mov ebx, DWORD PTR [edx+12] ; edx = dep->Dcodestrm + movzx ecx, WORD PTR [ebx+ecx] + xchg cl, ch + shl DWORD PTR [edx+4], 16 + mov WORD PTR [edx+4], cx + + add DWORD PTR [edx+8], 16 ; dep->DbitsLeft += 16 + ;mov eax, DWORD PTR _bit$[esp+STACKOFFSET] ; eax = bit = return value + pop edi + pop ebp + pop ebx + ret 0 +_biari_decode_symbol ENDP +_TEXT ENDS + +; +; +; a version of biari_decode_symbol slightly optimized +; pass dep in edx and ctx in eax. edx retains dep on exit + +_TEXT SEGMENT +_biari_decode_symbol_map PROC NEAR + STACKOFFSET=0 + push ebx + STACKOFFSET=4 + movzx ebx, WORD PTR [eax] ; ebx = state + push ebp + push edi + STACKOFFSET = 12 + + mov edi, DWORD PTR [edx] ; edi = range + mov ecx, edi ; ecx = range + and ecx, 0C0H ; range >>= 6 + movzx ebp, BYTE PTR _rLPS_table_64x4[ecx+ebx] ; ebp = rLPS + + ; register state: + ; ebx: state (bi_ct->state) + ; eax: bi_ct + ; edx: dep + ; edi: range + ; ebp: rLPS + + mov ecx, DWORD PTR [edx+8] ; ecx = bitsleft + sub edi, ebp ; range -= rLPS + shl edi, cl ; range << bitsleft + cmp DWORD PTR [edx+4], edi ; value < (range << bitsleft) + jge SHORT CABAC_OPT@LPS +; MPS + movzx bx, BYTE PTR _AC_next_state_MPS_64[ebx] ; ebx = state = AC_next_state_MPS_64[state] + shr edi, cl ; undo earlier shift + mov WORD PTR [eax], bx ; bi_ct->MPS = state + cmp edi, 256 ; 00000100H + setb cl + + ; register state + ; ebx: state + ; eax: bi_ct + ; ecx: state (old) + ; edx: dep + ; edi: range + ; ebp: rLPS + + shl edi, cl + sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft-- + mov DWORD PTR [edx], edi ; dep->Drange = range + movzx eax, BYTE PTR [eax+2] ; return bit + jz SHORT READ_TWO_BYTES ; if (dep->DbitsLeft==0) + + ; register state + ; ebx: state + ; eax: bi_ct + ; ecx: range<<1 + ; edx: dep + ; edi: range + ; ebp: rLPS + + pop edi + pop ebp + pop ebx + ret 0 +align 16 +CABAC_OPT@LPS: + sub DWORD PTR [edx+4], edi + movzx cx, BYTE PTR _AC_next_state_LPS_64[ebx] ; cx: state = AC_next_state_LPS_64[state] + mov WORD PTR [eax], cx ; store state back to bi_ct->MPS + + ; register state: + ; ebx: state (old) + ; eax: bi_ct + ; ecx: state (new) + ; edx: dep + ; edi: range + ; ebp: rLPS + + mov edi, eax + movzx eax, BYTE PTR [eax+2] + xor eax, 1 + test ebx, ebx ; if state(old) == 0 + movzx ecx, BYTE PTR _renorm_table_256[ebp] ; ecx = renorm_table_32[rLPS>>3] + sete bl ; bl = 1 [ if state(old) == 0 ] + xor BYTE PTR [edi+2], bl ; bl ^= bi_ct->state + + ; register state: + ; ebx: !state + ; eax: bi_ct + ; ecx: renorm + ; edx: dep + ; edi: range + ; ebp: rLPS + + shl ebp, cl ; ebp = range = rLPS <<= renorm + sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft -= renorm; + mov DWORD PTR [edx], ebp ; dep->Drange = range; + jle SHORT READ_TWO_BYTES ; if( dep->DbitsLeft <= 0 ) + + ; register state: + ; ebx: !state + ; eax: bi_ct + ; ecx: renorm + ; edx: dep + ; edi: range + ; ebp: range = rLPS <<= renorm + + pop edi + pop ebp + pop ebx + ret 0 +align 16 +READ_TWO_BYTES: + + ; register state: + ; ebx: !state + ; eax: bi_ct + ; ecx: renorm + ; edx: dep + ; edi: range + + mov ebx, DWORD PTR [edx+16] ; ebx = dep->Dcodestrm_len + mov ecx, DWORD PTR [ebx] ; ecx = *dep->Dcodestrm_len + lea edi, DWORD PTR [ecx+2] ; edi = *dep->Dcodestrm_len + 2 + mov DWORD PTR [ebx], edi ; *dep->Dcodestrm_len += 2 + mov ebx, DWORD PTR [edx+12] ; edx = dep->Dcodestrm + movzx ecx, WORD PTR [ebx+ecx] + xchg cl, ch + shl DWORD PTR [edx+4], 16 + mov WORD PTR [edx+4], cx + + add DWORD PTR [edx+8], 16 ; dep->DbitsLeft += 16 + pop edi + pop ebp + pop ebx + ret 0 + +_biari_decode_symbol_map ENDP +_TEXT ENDS + + +; ebx, ebp and edi are NOT preserved +; pass tex_ctx in ebp +; pass type in ebx +; pass dep in edx +; pass coeff in edi +SigCoefFunction MACRO MaxC2, TypeCtxOne, TypeCtxAbs, MaxPos +_abs_contexts$ = 28 ; local variable (safe because of how the function is called) +_one_contexts$ = 32 ; local variable (safe because of how the function is called) +STACKOFFSET=0 + lea eax, DWORD PTR [ebp+TypeCtxOne*20+6068] ; 6068 = offsetof(tex_ctx, one_contexts) + mov DWORD PTR _one_contexts$[esp+STACKOFFSET], eax ; one_contexts = tex_ctx->one_contexts[type2ctx_one[type]]; + ;push esi + STACKOFFSET=STACKOFFSET+0 + + ;esi: i (loop variable) = maxpos[type] + lea ecx, DWORD PTR [ebp+TypeCtxAbs*20+6508] + mov ebp, 1 ; ebp: c1 + xor ebx, ebx ; ebx: c2 + mov DWORD PTR _abs_contexts$[esp+STACKOFFSET], ecx ; abs_contexts = tex_ctx->abs_contexts[type2ctx_abs[type]]; +SIGN_COEFF@LOOP_AGAIN: + cmp WORD PTR [edi+esi*2], 0 ;if (coeff[i]!=0) + je SHORT SIGN_COEFF@LOOP_ITR + mov ecx, DWORD PTR _one_contexts$[esp+STACKOFFSET] + lea eax, DWORD PTR [ecx+ebp*4] + mov ebp, DWORD PTR plus_one_clip0_4[ebp*4] ; c1 = plus_one_clip0_4[c1]; + call _biari_decode_symbol_map ; biari_decode_symbol (dep_dp, one_contexts + c1); + test eax, eax + jz SHORT SIGN_COEFF@DECODE_EQ_PROB + ;add WORD PTR [edi+esi*2], ax ; coeff[i] += + mov ecx, DWORD PTR _abs_contexts$[esp+STACKOFFSET] + lea eax, DWORD PTR [ecx+ebx*4] + call _unary_exp_golomb_level_decode ;unary_exp_golomb_level_decode (dep_dp, abs_contexts + c2); + inc eax + add WORD PTR [edi+esi*2], ax ; coeff[i] += return val + xor ebp, ebp ; c1 = 0 + mov ebx, DWORD PTR MaxC2[ebx*4] +SIGN_COEFF@DECODE_EQ_PROB: + call _biari_decode_symbol_eq_prob_asm ; biari_decode_symbol_eq_prob(dep_dp) + js SHORT SIGN_COEFF@LOOP_ITR + neg WORD PTR [edi+esi*2] +SIGN_COEFF@LOOP_ITR: + sub esi, 1 + jns SHORT SIGN_COEFF@LOOP_AGAIN + pop esi + ret 0 +ENDM + +_TEXT SEGMENT +_read_significant_coefficients0 PROC +SigCoefFunction plus_one_clip4, 0, 0, 15 +_read_significant_coefficients0 ENDP +_read_significant_coefficients1 PROC +SigCoefFunction plus_one_clip4, 1, 1, 14 +_read_significant_coefficients1 ENDP +_read_significant_coefficients2 PROC +SigCoefFunction plus_one_clip4, 2, 2, 63 +_read_significant_coefficients2 ENDP +_read_significant_coefficients3 PROC +SigCoefFunction plus_one_clip4, 3, 3, 31 +_read_significant_coefficients3 ENDP +_read_significant_coefficients4 PROC +SigCoefFunction plus_one_clip4, 3, 3, 31 +_read_significant_coefficients4 ENDP +_read_significant_coefficients5 PROC +SigCoefFunction plus_one_clip4, 4, 4, 15 +_read_significant_coefficients5 ENDP +_read_significant_coefficients6 PROC +SigCoefFunction plus_one_clip3, 5, 5, 3 +_read_significant_coefficients6 ENDP +_read_significant_coefficients7 PROC +SigCoefFunction plus_one_clip4, 6, 6, 14 +_read_significant_coefficients7 ENDP +_read_significant_coefficients8 PROC +SigCoefFunction plus_one_clip3, 5, 5, 7 +_read_significant_coefficients8 ENDP +_read_significant_coefficients9 PROC +SigCoefFunction plus_one_clip4, 5, 5, 15 +_read_significant_coefficients9 ENDP +_read_significant_coefficients10 PROC +SigCoefFunction plus_one_clip4, 10, 10, 15 +_read_significant_coefficients10 ENDP +_read_significant_coefficients11 PROC +SigCoefFunction plus_one_clip4, 11, 11, 14 +_read_significant_coefficients11 ENDP +_read_significant_coefficients12 PROC +SigCoefFunction plus_one_clip4, 12, 12, 63 +_read_significant_coefficients12 ENDP +_read_significant_coefficients13 PROC +SigCoefFunction plus_one_clip4, 13, 13, 31 +_read_significant_coefficients13 ENDP +_read_significant_coefficients14 PROC +SigCoefFunction plus_one_clip4, 13, 13, 31 +_read_significant_coefficients14 ENDP +_read_significant_coefficients15 PROC +SigCoefFunction plus_one_clip4, 14, 14, 15 +_read_significant_coefficients15 ENDP +_read_significant_coefficients16 PROC +SigCoefFunction plus_one_clip4, 16, 16, 15 +_read_significant_coefficients16 ENDP +_read_significant_coefficients17 PROC +SigCoefFunction plus_one_clip4, 17, 17, 14 +_read_significant_coefficients17 ENDP +_read_significant_coefficients18 PROC +SigCoefFunction plus_one_clip4, 18, 18, 63 +_read_significant_coefficients18 ENDP +_read_significant_coefficients19 PROC +SigCoefFunction plus_one_clip4, 19, 19, 31 +_read_significant_coefficients19 ENDP +_read_significant_coefficients20 PROC +SigCoefFunction plus_one_clip4, 19, 19, 31 +_read_significant_coefficients20 ENDP +_read_significant_coefficients21 PROC +SigCoefFunction plus_one_clip4, 20, 20, 15 +_read_significant_coefficients21 ENDP +_TEXT ENDS + + +; +; push eax ; currSlice->coeff +; push ecx ; tex_ctx +; edi is NOT preserved +; pass currMB in edi +; pass dep in ebp +; pass type in ebx +; on return, edi contains coeff, edx contains dep + + +SigMapFunction MACRO PosCtxMap, TypeCtxLast, IsDC, MaxPos, PosCtxLast, TypeCtxMap, Func +last_ctx$ = 24 ; local variable (cheating and using stack space from _readRunLevel_CABAC) +coeff_ctr$ = 28 ; local variable (cheating and using stack space from _readRunLevel_CABAC) + STACKOFFSET=0 + mov edx, DWORD PTR [edi+p_Vid@Macroblock] ; edx: p_Vid + push esi + xor esi, esi + STACKOFFSET=STACKOFFSET+4 + mov edx, DWORD PTR [edx+structure@VideoParameters] + add edx, DWORD PTR [edi+mb_field@Macroblock] ; currMB->mb_field + mov edi, eax ; edi: coeff + mov eax, 1408 ; 16 * 22 * sizeof(BiContextType) + cmovz eax, esi + mov edx, OFFSET PosCtxMap + cmovnz edx, DWORD PTR _pos2ctx_map_int[ebx*4] + IF IsDC EQ 0 + lea ebx, [edx + 1] + ELSE + mov ebx, edx ; pos2ctx_Map = (fld) ? pos2ctx_map_int[type] : pos2ctx_map[type]; + ENDIF + mov edx, ebp + lea ebp, [eax+ecx+TypeCtxMap*64+map_contexts@TextureInfoContexts] ; map_ctx = tex_ctx->map_contexts[fld][type2ctx_map [type]]; + lea ecx, DWORD PTR [eax+ecx+TypeCtxLast*64+last_contexts@TextureInfoContexts] + mov DWORD PTR last_ctx$[esp+STACKOFFSET], ecx ; last_ctx = tex_ctx->last_contexts[fld][type2ctx_last[type]]; + mov DWORD PTR coeff_ctr$[esp+STACKOFFSET], esi; coeff_ctr = 0 + ;jne LOOP_AGAIN + + ; esi: i + ; ebx: i1 (loop end) + ; ebp: dep_dp + ; edi: coeff +; for (i=i0; i < i1; ++i) // if last coeff is reached, it has to be significant +LOOP_AGAIN: + +; --- read significance symbol --- +; if (biari_decode_symbol (dep_dp, map_ctx + pos2ctx_Map[i])) + + movzx eax, BYTE PTR [esi+ebx] + lea eax, DWORD PTR [ebp+eax*4] + call _biari_decode_symbol_map + test eax, eax + mov WORD PTR [edi+esi*2], ax ; coeff[i] = biari_decode_symbol() + je SHORT LOOP_ITR + +; --- read last coefficient symbol --- +; if (biari_decode_symbol (dep_dp, last_ctx + last[i])) + + inc DWORD PTR coeff_ctr$[esp+STACKOFFSET] ; coeff_ctr++ + IF IsDC EQ 0 + movzx ecx, BYTE PTR PosCtxLast[esi+1] + ELSE + movzx ecx, BYTE PTR PosCtxLast[esi] + ENDIF + mov eax, DWORD PTR last_ctx$[esp+STACKOFFSET] + lea eax, DWORD PTR [eax+ecx*4] + call _biari_decode_symbol_map + test eax, eax + je SHORT LOOP_ITR + + mov eax, DWORD PTR coeff_ctr$[esp+STACKOFFSET]; return coeff_ctr; + mov ecx, DWORD PTR [esp] + mov ebp, DWORD PTR [ecx+tex_ctx@Slice] ; ; edx: currSlice->tex_ctx + mov DWORD PTR [ecx+coeff_ctr@Slice], eax ; currSlice->coeff_ctr = return value (read_significance_map) + jmp Func + align 16 +LOOP_ITR: + inc esi + cmp esi, MaxPos + jl SHORT LOOP_AGAIN + mov eax, DWORD PTR coeff_ctr$[esp+STACKOFFSET] + mov WORD PTR [edi+esi*2], 1 + inc eax + mov ecx, DWORD PTR [esp] + mov ebp, DWORD PTR [ecx+tex_ctx@Slice] ; ; edx: currSlice->tex_ctx + mov DWORD PTR [ecx+coeff_ctr@Slice], eax ; currSlice->coeff_ctr = return value (read_significance_map) + jmp Func +ENDM + +_TEXT SEGMENT +_read_significance_map0 PROC +SigMapFunction _pos2ctx_map4x4, 0, 1, 15, _pos2ctx_last4x4, 0, _read_significant_coefficients0 +_read_significance_map0 ENDP +_read_significance_map1 PROC +SigMapFunction _pos2ctx_map4x4, 1, 0, 14, _pos2ctx_last4x4, 1, _read_significant_coefficients1 +_read_significance_map1 ENDP +_read_significance_map2 PROC +SigMapFunction _pos2ctx_map8x8, 2, 1, 63, _pos2ctx_last8x8, 2, _read_significant_coefficients2 +_read_significance_map2 ENDP +_read_significance_map3 PROC +SigMapFunction _pos2ctx_map8x4, 3, 1, 31, _pos2ctx_last8x4, 3, _read_significant_coefficients3 +_read_significance_map3 ENDP +_read_significance_map4 PROC +SigMapFunction _pos2ctx_map8x4, 4, 1, 31, _pos2ctx_last8x4, 4, _read_significant_coefficients4 +_read_significance_map4 ENDP +_read_significance_map5 PROC +SigMapFunction _pos2ctx_map4x4, 5, 1, 15, _pos2ctx_last4x4, 5, _read_significant_coefficients5 +_read_significance_map5 ENDP +_read_significance_map6 PROC +SigMapFunction _pos2ctx_map4x4, 6, 1, 3, _pos2ctx_last4x4, 6, _read_significant_coefficients6 +_read_significance_map6 ENDP +_read_significance_map7 PROC +SigMapFunction _pos2ctx_map4x4, 7, 0, 14, _pos2ctx_last4x4, 7, _read_significant_coefficients7 +_read_significance_map7 ENDP +_read_significance_map8 PROC +SigMapFunction _pos2ctx_map2x4c, 6, 1, 7, _pos2ctx_last2x4c, 6, _read_significant_coefficients8 +_read_significance_map8 ENDP +_read_significance_map9 PROC +SigMapFunction _pos2ctx_map4x4c, 6, 1, 15, _pos2ctx_last4x4c, 6, _read_significant_coefficients9 +_read_significance_map9 ENDP +_read_significance_map10 PROC +SigMapFunction _pos2ctx_map4x4, 10, 1, 15, _pos2ctx_last4x4, 10, _read_significant_coefficients10 +_read_significance_map10 ENDP +_read_significance_map11 PROC +SigMapFunction _pos2ctx_map4x4, 11, 0, 14, _pos2ctx_last4x4, 11, _read_significant_coefficients11 +_read_significance_map11 ENDP +_read_significance_map12 PROC +SigMapFunction _pos2ctx_map8x8, 12, 1, 63, _pos2ctx_last8x8, 12, _read_significant_coefficients12 +_read_significance_map12 ENDP +_read_significance_map13 PROC +SigMapFunction _pos2ctx_map8x4, 13, 1, 31, _pos2ctx_last8x4, 13, _read_significant_coefficients13 +_read_significance_map13 ENDP +_read_significance_map14 PROC +SigMapFunction _pos2ctx_map8x4, 14, 1, 31, _pos2ctx_last8x4, 14, _read_significant_coefficients14 +_read_significance_map14 ENDP +_read_significance_map15 PROC +SigMapFunction _pos2ctx_map4x4, 15, 1, 15, _pos2ctx_last4x4, 15, _read_significant_coefficients15 +_read_significance_map15 ENDP +_read_significance_map16 PROC +SigMapFunction _pos2ctx_map4x4, 16, 1, 15, _pos2ctx_last4x4, 16, _read_significant_coefficients16 +_read_significance_map16 ENDP +_read_significance_map17 PROC +SigMapFunction _pos2ctx_map4x4, 17, 0, 14, _pos2ctx_last4x4, 17, _read_significant_coefficients17 +_read_significance_map17 ENDP +_read_significance_map18 PROC +SigMapFunction _pos2ctx_map8x8, 18, 1, 63, _pos2ctx_last8x8, 18, _read_significant_coefficients18 +_read_significance_map18 ENDP +_read_significance_map19 PROC +SigMapFunction _pos2ctx_map8x4, 19, 1, 31, _pos2ctx_last8x4, 19, _read_significant_coefficients19 +_read_significance_map19 ENDP +_read_significance_map20 PROC +SigMapFunction _pos2ctx_map8x4, 20, 1, 31, _pos2ctx_last8x4, 20, _read_significant_coefficients20 +_read_significance_map20 ENDP +_read_significance_map21 PROC +SigMapFunction _pos2ctx_map4x4, 21, 1, 15, _pos2ctx_last4x4, 21, _read_significant_coefficients21 +_read_significance_map21 ENDP +_TEXT ENDS + + +_TEXT SEGMENT +; edx: dep - unchanged by function +; SF holds the return value +_biari_decode_symbol_eq_prob_asm PROC + mov ecx, DWORD PTR [edx+8]; dep->DbitsLeft + dec ecx ; dep->DbitsLeft-- + mov eax, DWORD PTR [edx+4] ; eax: dep->DValue + push esi + jnz SHORT $LN3@biari_deco; if(--(dep->DbitsLeft) == 0) + + mov ecx, DWORD PTR [edx+16] ; ebp: dep->Dcodestrm_len + mov esi, DWORD PTR [ecx] ; esi: *dep->Dcodestrm_len + add DWORD PTR [ecx], 2 ; *dep->Dcodestrm_len += 2 + mov ecx, DWORD PTR [edx+12] ; ebp: dep->Dcodestrm + shl eax, 16 + mov ax, WORD PTR [ecx+esi] ; value = (value << 16) | getword( dep ) + xchg ah, al + mov ecx, 16 ; dep->DbitsLeft = 16; +$LN3@biari_deco: + mov esi, DWORD PTR [edx] ; dep->Drange + shl esi, cl ; (dep->Drange << dep->DbitsLeft) + mov DWORD PTR [edx+8], ecx + mov ecx, eax + sub ecx, esi + pop esi + + cmovns eax, ecx ; if (tmp_value <0) value = tmp_value + mov DWORD PTR [edx+4], eax ; dep->Dvalue = value; + ret 0 +_biari_decode_symbol_eq_prob_asm ENDP +_TEXT ENDS + +_TEXT SEGMENT +; edx: dep. retained on return +; esi and ebp are NOT retained, because the (only) calling function doesn't need them to be +_exp_golomb_decode_eq_prob0 PROC +STACKOFFSET=0 + xor esi, esi ; esi: binary_symbol + xor ebp, ebp ; ebp: symbol + push edi + mov edi, 1 ; edi: k +DECODE_EQ@LOOP_AGAIN: + call _biari_decode_symbol_eq_prob_asm ; l = biari_decode_symbol_eq_prob(dep_dp); + js SHORT DECODE_EQ@LOOP_DONE + add ebp, edi ; symbol += k + shl edi, 1 ; k <<= 1 + jmp SHORT DECODE_EQ@LOOP_AGAIN + align 16 +DECODE_EQ@LOOP_DONE: + shr edi, 1 + jz SHORT DECODE_EQ@RETURN + call _biari_decode_symbol_eq_prob_asm ; if (biari_decode_symbol_eq_prob(dep_dp)==1) + js SHORT DECODE_EQ@LOOP_DONE + or esi, edi ; binary_symbol |= (1<<k); + jmp SHORT DECODE_EQ@LOOP_DONE + align 16 +DECODE_EQ@RETURN: + lea eax, DWORD PTR [esi+ebp+13] ; return (unsigned int) (symbol + binary_symbol); + pop edi + ret 0 +_exp_golomb_decode_eq_prob0 ENDP +_TEXT ENDS + +; +; +; pass dep in edx, context in eax +; edx is retained on return +; ebp is destroyed + +_TEXT SEGMENT +ctx = 4 ; second parameter +_unary_exp_golomb_level_decode PROC + STACKOFFSET=0 + mov ebp, eax ; eax (and now ebp also) contains the context pointer + call _biari_decode_symbol_map + test eax, eax ; if (symbol==0) + jne SHORT SYMBOL_NOT_ZERO + ret 0 +align 16 +SYMBOL_NOT_ZERO: + push esi + xor esi, esi +LEVEL_DECODE@LOOP_AGAIN: + + mov eax, ebp ; _biari_decode_symbol_map wants ctx in eax + inc esi ; ++symbol; + call _biari_decode_symbol_map ; l = biari_decode_symbol(dep_dp, ctx); + + test eax, eax ; if (!l) + je SHORT LEVEL_IS_ZERO + cmp esi, 12 ; exp_start-1 + jb SHORT LEVEL_DECODE@LOOP_AGAIN + + call _exp_golomb_decode_eq_prob0 ; exp_golomb_decode_eq_prob(dep_dp,0) + pop esi + ret 0 +align 16 +LEVEL_IS_ZERO: + mov eax, esi ; return symbol; + pop esi + ret 0 +_unary_exp_golomb_level_decode ENDP +_TEXT ENDS + +CONST SEGMENT +sigmap_functions DD FLAT:_read_significance_map0 +DD FLAT:_read_significance_map1 +DD FLAT:_read_significance_map2 +DD FLAT:_read_significance_map3 +DD FLAT:_read_significance_map4 +DD FLAT:_read_significance_map5 +DD FLAT:_read_significance_map6 +DD FLAT:_read_significance_map7 +DD FLAT:_read_significance_map8 +DD FLAT:_read_significance_map9 +DD FLAT:_read_significance_map10 +DD FLAT:_read_significance_map11 +DD FLAT:_read_significance_map12 +DD FLAT:_read_significance_map13 +DD FLAT:_read_significance_map14 +DD FLAT:_read_significance_map15 +DD FLAT:_read_significance_map16 +DD FLAT:_read_significance_map17 +DD FLAT:_read_significance_map18 +DD FLAT:_read_significance_map19 +DD FLAT:_read_significance_map20 +DD FLAT:_read_significance_map21 +CONST ENDS + +PUBLIC _readRunLevel_CABAC +_TEXT SEGMENT +_currMB$ = 4 ; first parameter +_dep_dp$ = 8 ; second parameter +_context$ = 12 ; third parameter +_readRunLevel_CABAC PROC + push esi + push edi +STACKOFFSET=8 + mov edi, DWORD PTR _currMB$[esp+STACKOFFSET] ; edi: currMB + mov esi, DWORD PTR [edi] ; esi: currSlice = currMB->p_Slice; + + cmp DWORD PTR [esi+coeff_ctr@Slice], 0 ; if (currSlice->coeff_ctr >= 0) + jge SHORT SET_RUN_AND_LEVEL + +; ===== decode CBP-BIT ===== + mov eax, DWORD PTR [edi+read_and_store_CBP_block_bit@Macroblock] ; eax: currMB->read_and_store_CBP_block_bit + push ebx +STACKOFFSET=STACKOFFSET+4 + mov ebx, DWORD PTR _context$[esp+STACKOFFSET] ; ebx: context + push ebp +STACKOFFSET=STACKOFFSET+4 + mov ebp, DWORD PTR _dep_dp$[esp+STACKOFFSET] ; ebp: dep + push ebx ; context + push ebp ; dep + push edi ; currMB + call eax ; currMB->read_and_store_CBP_block_bit(currMB, dep_dp, context) + add esp, 12 + mov DWORD PTR [esi+coeff_ctr@Slice], eax ; currSlice->coeff_ctr = return value + test eax, eax ; if (currSlice->coeff_ctr == 0) + je SHORT SET_RUN_AND_LEVEL_POP + +; ===== decode significance coefficients ===== + mov ecx, DWORD PTR [esi+tex_ctx@Slice] ; ecx: currSlice->tex_ctx + lea eax, DWORD PTR [esi+coeff@Slice] ; eax: currSlice->coeff + ;push eax ; currSlice->coeff + ;push ecx ; tex_ctx + ;call _read_significance_map ; read_significance_map(currSlice->tex_ctx, currMB, dep_dp, context, currSlice->coeff); + call sigmap_functions[ebx*4] +SET_RUN_AND_LEVEL_POP: + pop ebp + pop ebx +STACKOFFSET=STACKOFFSET-8 +SET_RUN_AND_LEVEL: + +; --- set run and level --- + xor edx, edx ; edx: 0 + + dec DWORD PTR [esi+coeff_ctr@Slice] ; if (currSlice->coeff_ctr--) + js SHORT EOB + +; --- set run and level (coefficient) --- + mov ecx, DWORD PTR [esi+pos@Slice] ; ecx: currSlice->pos + xor edi, edi ; edi: run=0 + cmp WORD PTR [esi+ecx*2+coeff@Slice], dx ; currSlice->coeff[currSlice->pos] == 0 + jne SHORT LOOP_END +LOOP_ITR: + cmp WORD PTR [esi+ecx*2+1+coeff@Slice], dx ; currSlice->coeff[currSlice->pos] == 0 + lea ecx, [ecx+1] + lea edi, [edi+1] + je SHORT LOOP_ITR +LOOP_END: + movsx eax, WORD PTR [esi+ecx*2+coeff@Slice] ; eax: value = currSlice->coeff[currSlice->pos] + inc ecx ; currSlice->pos++ + +; --- decrement coefficient counter and re-set position --- + + ;cmp DWORD PTR [esi+coeff_ctr@Slice], edx ; if (currSlice->coeff_ctr == 0) + ;cmove ecx, edx ; currSlice->pos = 0 + mov edx, edi + pop edi + mov DWORD PTR [esi+pos@Slice], ecx ; store currSlice->pos + pop esi + ret 0 ; eax contains value + align 16 +EOB: + xor eax, eax ; return 0 + mov DWORD PTR [esi+pos@Slice], edx ; currSlice->pos = 0; + pop edi + pop esi + ret 0 +_readRunLevel_CABAC ENDP +_TEXT ENDS + +; +; edi is not saved +; pass dep_dp in edx, retained on exit +; pass ctx in edi +; return value in esi + +PUBLIC _unary_exp_golomb_mv_decode3 +_TEXT SEGMENT +_ctx$ = 4 ; second parameter +_unary_exp_golomb_mv_decode3 PROC +STACKOFFSET=0 + mov eax, edi + call _biari_decode_symbol_map ; pass dep in edx and ctx in eax. edx retains dep on exit + test eax, eax ; if (symbol) + jne SHORT SYMBOL_NOT_ZERO + xor esi, esi + ret 0 + align 16 +SYMBOL_NOT_ZERO: + push ebp +STACKOFFSET=STACKOFFSET+4 + mov ebp, 3 + add edi, 4 ; ctx++ + mov esi, 1 ; esi: symbol +LOOP_START: + mov eax, edi + call _biari_decode_symbol_map ; pass dep in edx and ctx in eax. edx retains dep on exit + test eax, eax + je SHORT SYMBOL_ZERO_RETURN + + inc esi + cmp esi, 2 ; if (symbol == 2) + sete al ; eax will be 1, so this is safe to do + lea edi, [edi + eax*4] ; ctx += (symbol == 2) + + cmp esi, ebp ; if (symbol == max_bin) + sete al ; eax will have nothing set high, so this is safe to do + lea edi, [edi + eax*4] ; ctx += (symbol != max_bin) + + cmp esi, 8 ; if (symbol < exp_start) + jb SHORT LOOP_START + +; return exp_start + exp_golomb_decode_eq_prob(dep_dp,3); + xor ebp, ebp ; ebp: symbol + mov edi, ebp ; edi: binary_symbol +DECODE_EQ3@LOOP1: + call _biari_decode_symbol_eq_prob_asm ; edx holds dep_dp + js SHORT DECODE_EQ3@LOOP2 + or ebp, esi; symbol += (l<<k) + shl esi, 1 ; k <<= 1 + jmp SHORT DECODE_EQ3@LOOP1 + align 16 +DECODE_EQ3@LOOP2: + shr esi, 1 + jz SHORT DECODE_EQ3@RETURN + call _biari_decode_symbol_eq_prob_asm + js SHORT DECODE_EQ3@LOOP2 + or edi, esi ; binary_symbol |= (1<<k); + jmp SHORT DECODE_EQ3@LOOP2 + align 16 +DECODE_EQ3@RETURN: + ; return (unsigned int) (symbol + binary_symbol); + lea esi, [edi+ebp+8] + pop ebp + ret 0 + align 16 +SYMBOL_ZERO_RETURN: + ; return symbol is in esi + pop ebp + ret 0 +_unary_exp_golomb_mv_decode3 ENDP +_TEXT ENDS + +_TEXT SEGMENT +_unary_bin_decode1 PROC +; _ctx$ = eax +; _dep_dp$ = edx + push edi + mov edi, eax + call _biari_decode_symbol_map ; biari_decode_symbol(dep_dp, ctx ); + test eax, eax ; if (symbol) + jne SHORT $LN5@unary_bin_@2 + mov eax, 2 + shr eax, 1 + pop edi + ret 0 +align 16 +$LN5@unary_bin_@2: + xor esi, esi ; symbol = 0; +$LL3@unary_bin_@2: + inc esi ; ++symbol; + lea eax, DWORD PTR [edi+4] ; ctx + ctx_offset + call _biari_decode_symbol_map ; biari_decode_symbol(dep_dp, ctx); + test eax, eax ; while( l != 0 ); + jne SHORT $LL3@unary_bin_@2 + lea eax, [esi + 2]; return symbol+2; + shr eax, 1 + pop edi + ret 0 +_unary_bin_decode1 ENDP +_TEXT ENDS + + +PUBLIC _readDquant_CABAC +_TEXT SEGMENT +_currSlice$ = 4 ; first parameter +_dep_dp$ = 8 ; second parameter +_readDquant_CABAC PROC +STACKOFFSET=0 +; 815 : MotionInfoContexts *ctx = currSlice->mot_ctx; +; 816 : short dquant; +; 817 : int act_ctx = ((currSlice->last_dquant != 0) ? 1 : 0); +; 818 : int act_sym = biari_decode_symbol(dep_dp,ctx->delta_qp_contexts + act_ctx ); + + mov edx, DWORD PTR _dep_dp$[esp+STACKOFFSET] + push esi + push edi +STACKOFFSET = STACKOFFSET + 8 + mov edi, DWORD PTR _currSlice$[esp+STACKOFFSET] + mov esi, DWORD PTR [edi+mot_ctx@Slice] + xor eax, eax + cmp DWORD PTR [edi+last_dquant@Slice], eax + setne al + lea eax, DWORD PTR [esi+eax*4+332] + ; pass dep in edx and ctx in eax. edx retains dep on exit + call _biari_decode_symbol_map + + test eax, eax ; if (!act_sym) + jz SHORT $LN2@readDquant + + lea eax, DWORD PTR [esi+340] ; unary_bin_decode(dep_dp,ctx->delta_qp_contexts + 2,1); + call _unary_bin_decode1 + + jnc SHORT $LN2@readDquant ; lsb is signed bit + + neg eax ; dquant = -dquant; + movzx eax, ax +$LN2@readDquant: + movsx edx, ax + mov DWORD PTR [edi+last_dquant@Slice], edx ; currSlice->last_dquant = dquant; + pop edi + pop esi + ;mov ax, cx ; return dquant; + ret 0 +_readDquant_CABAC ENDP +_TEXT ENDS + +PUBLIC _readIntraPredMode_CABAC +_TEXT SEGMENT +_currSlice$ = 4 ; first parameter +_dep_dp$ = 8 ; second parameter +_readIntraPredMode_CABAC PROC +; 720 : TextureInfoContexts *ctx = +STACKOFFSET=0 + mov eax, DWORD PTR _currSlice$[esp + STACKOFFSET] + push esi + mov esi, DWORD PTR [eax+100] ; currSlice->tex_ctx; +STACKOFFSET=4 +; 721 : int act_sym; +; 722 : +; 723 : // use_most_probable_mode +; 724 : act_sym = biari_decode_symbol(dep_dp, ctx->ipr_contexts); + + mov edx, DWORD PTR _dep_dp$[esp+STACKOFFSET] + lea eax, DWORD PTR [esi+12] + call _biari_decode_symbol_map + + ; remaining_mode_selector + test eax, eax ; if (act_sym == 0) + jz SHORT $LN2@readIntraP + + or eax, -1 ; return -1; + pop esi + ret 0 +align 16 +$LN2@readIntraP: + push ebx + add esi, 16 ; 00000010H + mov eax, esi + call _biari_decode_symbol_map + mov ebx, eax +; 735 : pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 1); + + mov eax, esi + call _biari_decode_symbol_map + lea ebx, [ebx+2*eax] +; 736 : pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 2); + + mov eax, esi + call _biari_decode_symbol_map + lea eax, [ebx+4*eax] ; return pred_mode; + + pop ebx + pop esi + ret 0 +_readIntraPredMode_CABAC ENDP +_TEXT ENDS + +PUBLIC _readMB_skip_flagInfo_CABAC +_TEXT SEGMENT +_currMB$ = 4 ; first parameter +_dep_dp$ = 12 ; size = 4 +_readMB_skip_flagInfo_CABAC PROC + +; 406 : Slice *currSlice = currMB->p_Slice; +STACKOFFSET=0 + mov ecx, DWORD PTR _currMB$[esp + STACKOFFSET] + push ebp + + xor eax, eax + push esi + mov esi, DWORD PTR [ecx + p_Slice@Macroblock] ; esi: currSlice + cmp DWORD PTR [esi+slice_type@Slice], 1 ; int bframe=(currSlice->slice_type == B_SLICE); + push edi + + mov edi, DWORD PTR [esi+mot_ctx@Slice] ; edi: ctx = currSlice->mot_ctx; + sete al ; int bframe=(currSlice->slice_type == B_SLICE); + +; 409 : int a = (currMB->mb_left != NULL) ? (currMB->mb_left->skip_flag == 0) : 0; + + xor edx, edx + mov ebp, eax + mov eax, DWORD PTR [ecx+104] + test eax, eax + je SHORT READ_B + cmp DWORD PTR [eax+348], edx + sete dl + +; 410 : int b = (currMB->mb_up != NULL) ? (currMB->mb_up ->skip_flag == 0) : 0; + +READ_B: + mov ecx, DWORD PTR [ecx+100] + xor eax, eax + test ecx, ecx + je SHORT $LN9@readMB_ski + cmp DWORD PTR [ecx+348], eax + sete al +$LN9@readMB_ski: + +; 414 : if (bframe) +; 415 : { +; 416 : act_ctx = 7 + a + b; +; 418 : skip = biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx]); + + add eax, edx + test ebp, ebp + mov edx, DWORD PTR _dep_dp$[esp+8] + je SHORT $LN3@readMB_ski + lea eax, DWORD PTR [edi+eax*4+116] + jmp SHORT $LN11@readMB_ski +align 16 +$LN3@readMB_ski: + +; 422 : act_ctx = a + b; +; 424 : skip = biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][act_ctx]); + + lea eax, DWORD PTR [edi+eax*4+44] +$LN11@readMB_ski: + call _biari_decode_symbol_map + + test eax, eax ; if (!skip) + je SHORT $LN1@readMB_ski + +; 429 : currSlice->last_dquant = 0; + + mov DWORD PTR [esi + last_dquant@Slice], 0 +$LN1@readMB_ski: + pop edi + pop esi + pop ebp + ret 0 +_readMB_skip_flagInfo_CABAC ENDP +_TEXT ENDS + + +PUBLIC _set_chroma_qp +_TEXT SEGMENT +_currMB$ = 4 ; first parameter +_set_chroma_qp PROC + mov eax, DWORD PTR _currMB$[esp] ; eax: currMB + mov ecx, DWORD PTR [eax+4] ; ecx: currMB->p_Vid + mov edx, DWORD PTR [ecx+bitdepth_chroma_qp_scale@VideoParameters] ; edx: p_Vid->bitdepth_chroma_qp_scale; + push edi + mov edi, DWORD PTR [ecx+dec_picture@VideoParameters] ; edi: p_Vid->dec_picture + mov ecx, DWORD PTR [edi+chroma_qp_offset@StorablePicture] ; ecx: dec_picture->chroma_qp_offset[0] + add ecx, DWORD PTR [eax+qp@macroblock] ; ecx: dec_picture->chroma_qp_offset[0] + currMB->qp + neg edx ; edx: -p_Vid->bitdepth_chroma_qp_scale; + cmp ecx, edx + cmovl ecx, edx + cmp ecx, 51 + cmovg ecx, DWORD PTR _51 ; cmov doesn't allow for immediates + test ecx, ecx + cmovge ecx, DWORD PTR _QP_SCALE_CR[ecx*4] + mov DWORD PTR [eax+64], ecx + + sub ecx, edx; currMB->qpc[0] + p_Vid->bitdepth_chroma_qp_scale; + mov DWORD PTR [eax+qp_scaled@Macroblock + 4], ecx ; currMB->qp_scaled[1] + mov ecx, DWORD PTR [edi+chroma_qp_offset@StorablePicture + 4] + add ecx, DWORD PTR [eax+qp@macroblock] + cmp ecx, edx + cmovl ecx, edx + cmp ecx, 51 + cmovg ecx, DWORD PTR _51 ; cmov doesn't allow for immediates + test ecx, ecx + cmovge ecx, DWORD PTR _QP_SCALE_CR[ecx*4] + mov DWORD PTR [eax+64+4], ecx + sub ecx, edx + pop edi + mov DWORD PTR [eax+72 + 8], ecx + ret 0 +_set_chroma_qp ENDP +_TEXT ENDS + +PUBLIC _decodeMVD_CABAC +_TEXT SEGMENT +_dep_dp$ = 4 ; first parameter +_mv_ctx$ = 8 ; second parameter +_act_ctx$ = 12; third parameter +_err$ = 16 ; 4th parameter +_decodeMVD_CABAC PROC +STACKOFFSET = 0 + mov eax, DWORD PTR _act_ctx$[esp+STACKOFFSET] + push edi +STACKOFFSET = STACKOFFSET + 4 + mov edi, DWORD PTR _mv_ctx$[esp+STACKOFFSET] + lea edi, [edi+eax*4] ; mv_ctx[0][act_ctx] + mov eax, DWORD PTR _err$[esp+STACKOFFSET] + lea eax, DWORD PTR [edi+eax*4] ; &mv_ctx[0][act_ctx+err] + mov edx, DWORD PTR _dep_dp$[esp+STACKOFFSET] + call _biari_decode_symbol_map ; int act_sym = biari_decode_symbol(dep_dp,&mv_ctx[0][act_ctx+err] ); + + test eax, eax ; if (act_sym != 0) + je SHORT SYMBOL_ZERO + push esi +STACKOFFSET = STACKOFFSET + 4 + lea edi, [edi + 40] ; mv_ctx[1]+act_ctx + call _unary_exp_golomb_mv_decode3 ; act_sym = unary_exp_golomb_mv_decode3(dep_dp,mv_ctx[1]+act_ctx); + inc esi ; ++act_sym; + call _biari_decode_symbol_eq_prob_asm ; mv_sign = biari_decode_symbol_eq_prob(dep_dp); + js SHORT SKIP_NEGATE; if(mv_sign) + neg esi ; act_sym = -act_sym; +SKIP_NEGATE: + mov eax, esi + pop esi +SYMBOL_ZERO: + pop edi + ret 0 +_decodeMVD_CABAC ENDP +_TEXT ENDS + +END + diff --git a/Src/h264dec/ldecod/src/biaridecod.c b/Src/h264dec/ldecod/src/biaridecod.c new file mode 100644 index 00000000..8b1d44f3 --- /dev/null +++ b/Src/h264dec/ldecod/src/biaridecod.c @@ -0,0 +1,322 @@ +/*! + ************************************************************************************* + * \file biaridecod.c + * + * \brief + * Binary arithmetic decoder routines. + * + * This modified implementation of the M Coder is based on JVT-U084 + * with the choice of M_BITS = 16. + * + * \date + * 21. Oct 2000 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe <marpe@hhi.de> + * - Gabi Blaettermann + * - Gunnar Marten + ************************************************************************************* + */ + +#include "global.h" +#include "memalloc.h" +#include "biaridecod.h" + + +#define B_BITS 10 // Number of bits to represent the whole coding interval +#define HALF 0x01FE //(1 << (B_BITS-1)) - 2 +#define QUARTER 0x0100 //(1 << (B_BITS-2)) + + +/************************************************************************ + ************************************************************************ + init / exit decoder + ************************************************************************ + ************************************************************************/ + + +/*! + ************************************************************************ + * \brief + * Allocates memory for the DecodingEnvironment struct + * \return DecodingContextPtr + * allocates memory + ************************************************************************ + */ +DecodingEnvironmentPtr arideco_create_decoding_environment() +{ + DecodingEnvironmentPtr dep; + + if ((dep = calloc(1,sizeof(DecodingEnvironment))) == NULL) + no_mem_exit("arideco_create_decoding_environment: dep"); + return dep; +} + + +/*! + *********************************************************************** + * \brief + * Frees memory of the DecodingEnvironment struct + *********************************************************************** + */ +void arideco_delete_decoding_environment(DecodingEnvironmentPtr dep) +{ + if (dep == NULL) + { + snprintf(errortext, ET_SIZE, "Error freeing dep (NULL pointer)"); + error (errortext, 200); + } + else + free(dep); +} + +/*! + ************************************************************************ + * \brief + * finalize arithetic decoding(): + ************************************************************************ + */ +void arideco_done_decoding(DecodingEnvironmentPtr dep) +{ + (*dep->Dcodestrm_len)++; +#if(TRACE==2) + fprintf(p_trace, "done_decoding: %d\n", *dep->Dcodestrm_len); +#endif +} + +/*! + ************************************************************************ + * \brief + * read one byte from the bitstream + ************************************************************************ + */ +unsigned int getbyte(DecodingEnvironmentPtr dep) +{ +#if(TRACE==2) + fprintf(p_trace, "get_byte: %d\n", (*dep->Dcodestrm_len)); +#endif + return dep->Dcodestrm[(*dep->Dcodestrm_len)++]; +} + +/*! + ************************************************************************ + * \brief + * read two bytes from the bitstream + ************************************************************************ + */ + +static unsigned int getword(DecodingEnvironmentPtr dep) +{ + int d = *dep->Dcodestrm_len; + *dep->Dcodestrm_len += 2; + return ((dep->Dcodestrm[d]<<8) | dep->Dcodestrm[d+1]); +} + +/*! + ************************************************************************ + * \brief + * Initializes the DecodingEnvironment for the arithmetic coder + ************************************************************************ + */ +void arideco_start_decoding(DecodingEnvironmentPtr dep, unsigned char *code_buffer, + int firstbyte, int *code_len) +{ + + dep->Dcodestrm = code_buffer; + dep->Dcodestrm_len = code_len; + *dep->Dcodestrm_len = firstbyte; + + dep->Dvalue = getbyte(dep); + dep->Dvalue = (dep->Dvalue << 16) | getword(dep); // lookahead of 2 bytes: always make sure that bitstream buffer + // contains 2 more bytes than actual bitstream + dep->DbitsLeft = 15; + dep->Drange = HALF; + +#if (2==TRACE) + fprintf(p_trace, "value: %d firstbyte: %d code_len: %d\n", dep->Dvalue >> dep->DbitsLeft, firstbyte, *code_len); +#endif +} + + + + +/*! +************************************************************************ +* \brief +* biari_decode_symbol(): +* \return +* the decoded symbol +************************************************************************ +*/ +/* random notes +max rLPS = 240 1111 1 111 +max state = 63 +max renorm = 6, min 1 +max bitsleft = 16 +max range = (1<<10) ????? (1024) +*/ +#if !defined(_M_IX86) || defined(_DEBUG) +unsigned int biari_decode_symbol(DecodingEnvironmentPtr dep, BiContextTypePtr bi_ct ) +{ + unsigned int state = bi_ct->state; + unsigned int bit = bi_ct->MPS; + unsigned int value = dep->Dvalue; + unsigned int range = dep->Drange; + const unsigned int rLPS = rLPS_table_64x4[(range>>6)&3][state]; + + range -= rLPS; + + if(value >= (range << dep->DbitsLeft)) + { // LPS + int renorm; + bi_ct->state = AC_next_state_LPS_64[state]; // next state + value -= (range << dep->DbitsLeft); + bit ^= 0x01; + + //if (!state) // switch meaning of MPS if necessary + // bi_ct->MPS = bit; + bi_ct->MPS ^= !state;//0x01; + + renorm = renorm_table_256[rLPS]; + range = (rLPS << renorm); + + dep->Drange = range; + dep->DbitsLeft -= renorm; + if( dep->DbitsLeft > 0 ) + { + dep->Dvalue = value; + return(bit); + } + + dep->Dvalue = (value << 16) | getword(dep); // lookahead of 2 bytes: always make sure that bitstream buffer + // contains 2 more bytes than actual bitstream + dep->DbitsLeft += 16; + + return(bit); + } + else + { //MPS + bi_ct->state = AC_next_state_MPS_64[state]; // next state + + if( range < QUARTER ) + { + dep->Drange = range << 1; + dep->DbitsLeft -= 1; + if( dep->DbitsLeft > 0 ) + { + return(bit); + } + + dep->Dvalue = (value << 16) | getword(dep); // lookahead of 2 bytes: always make sure that bitstream buffer + // contains 2 more bytes than actual bitstream + dep->DbitsLeft += 16; + + return(bit); + } + else + { + dep->Drange = range; + return (bit); + } + } + +} +#endif +/*! + ************************************************************************ + * \brief + * biari_decode_symbol_eq_prob(): + * \return + * the decoded symbol + ************************************************************************ + */ +unsigned int biari_decode_symbol_eq_prob(DecodingEnvironmentPtr dep) +{ + int tmp_value; + int value = dep->Dvalue; + + if(--(dep->DbitsLeft) == 0) + { + value = (value << 16) | getword( dep ); // lookahead of 2 bytes: always make sure that bitstream buffer + // contains 2 more bytes than actual bitstream + dep->DbitsLeft = 16; + } + tmp_value = value - (dep->Drange << dep->DbitsLeft); + + if (tmp_value < 0) + { + dep->Dvalue = value; + return 0; + } + else + { + dep->Dvalue = tmp_value; + return 1; + } +} + +/*! + ************************************************************************ + * \brief + * biari_decode_symbol_final(): + * \return + * the decoded symbol + ************************************************************************ + */ +unsigned int biari_decode_final(DecodingEnvironmentPtr dep) +{ + unsigned int range = dep->Drange - 2; + int value = dep->Dvalue; + value -= (range << dep->DbitsLeft); + + if (value < 0) + { + if( range >= QUARTER ) + { + dep->Drange = range; + return 0; + } + else + { + dep->Drange = (range << 1); + if( --(dep->DbitsLeft) > 0 ) + return 0; + else + { + dep->Dvalue = (dep->Dvalue << 16) | getword( dep ); // lookahead of 2 bytes: always make sure that bitstream buffer + // contains 2 more bytes than actual bitstream + dep->DbitsLeft = 16; + return 0; + } + } + } + else + { + return 1; + } +} + +/*! + ************************************************************************ + * \brief + * Initializes a given context with some pre-defined probability state + ************************************************************************ + */ +void biari_init_context (int qp, BiContextTypePtr ctx, const char* ini) +{ + int pstate = ((ini[0]* qp )>>4) + ini[1]; + + if ( pstate >= 64 ) + { + pstate = imin(126, pstate); + ctx->state = (uint16) (pstate - 64); + ctx->MPS = 1; + } + else + { + pstate = imax(1, pstate); + ctx->state = (uint16) (63 - pstate); + ctx->MPS = 0; + } +} + diff --git a/Src/h264dec/ldecod/src/block.c b/Src/h264dec/ldecod/src/block.c new file mode 100644 index 00000000..d048f956 --- /dev/null +++ b/Src/h264dec/ldecod/src/block.c @@ -0,0 +1,929 @@ + +/*! + *********************************************************************** + * \file + * block.c + * + * \brief + * Block functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langoy <inge.lille-langoy@telenor.com> + * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + *********************************************************************** + */ + +#include "contributors.h" + +#include "global.h" +#include "block.h" +#include "image.h" +#include "mb_access.h" +#include "transform.h" +#include "quant.h" +#include "memalloc.h" +#include "optim.h" + + +/*! + **************************************************************************** + * \brief + * Inverse 4x4 lossless_qpprime transformation, transforms cof to mb_rres + **************************************************************************** + */ +void itrans4x4_ls(const h264_short_block_row_t *tblock, + const h264_imgpel_macroblock_row_t *mb_pred, + h264_imgpel_macroblock_row_t *mb_rec, + int ioff, //!< index to 4x4 block + int joff) //!< index to 4x4 block +{ + int i,j; + for (j = 0; j < BLOCK_SIZE; ++j) + { + for (i = 0; i < BLOCK_SIZE; ++i) + { + mb_rec[j+joff][i+ioff] = (imgpel) iClip1(255/*max_imgpel_value*/, mb_pred[j+joff][i+ioff] + tblock[j][i]); + } + } +} + + +/*! +************************************************************************ +* \brief +* Inverse residual DPCM for Intra lossless coding +* +************************************************************************ +*/ +void Inv_Residual_trans_4x4(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< used color plane + int ioff, //!< index to 4x4 block + int joff) //!< index to 4x4 block +{ + int i,j; + h264_short_block_t temp; + Slice *currSlice = currMB->p_Slice; + int subblock = cof4_pos_to_subblock[joff>>2][ioff>>2]; + + h264_short_block_row_t *tblock = currSlice->cof4[pl][subblock]; + + if(currMB->ipmode_DPCM == VERT_PRED) + { + for(i=0; i<4; ++i) + { + temp[0][i] = tblock[0][i]; + temp[1][i] = tblock[1][i] + temp[0][i]; + temp[2][i] = tblock[2][i] + temp[1][i]; + temp[3][i] = tblock[3][i] + temp[2][i]; + } + } + else if(currMB->ipmode_DPCM == HOR_PRED) + { + for(j=0; j<4; ++j) + { + temp[j][0] = tblock[j][0]; + temp[j][1] = tblock[j][1] + temp[j][0]; + temp[j][2] = tblock[j][2] + temp[j][1]; + temp[j][3] = tblock[j][3] + temp[j][2]; + } + } + else + { + for (j = 0; j < BLOCK_SIZE; ++j) + for (i = 0; i < BLOCK_SIZE; ++i) + temp[j][i] = tblock[j][i]; + } + + for (j = 0; j < BLOCK_SIZE; ++j) + { + for (i = 0; i < BLOCK_SIZE; ++i) + { + currSlice->mb_rec[pl][j+joff][i+ioff] = (imgpel) (temp[j][i] + currSlice->mb_pred[pl][j+joff][i+ioff]); + } + } +} + +/*! +************************************************************************ +* \brief +* Inverse residual DPCM for Intra lossless coding +* +* \par Input: +* ioff_x,joff_y: Block position inside a macro block (0,8). +************************************************************************ +*/ +//For residual DPCM +void Inv_Residual_trans_8x8(Macroblock *currMB, ColorPlane pl, int ioff,int joff) +{ + Slice *currSlice = currMB->p_Slice; + int i, j; + h264_short_8x8block_t temp; + + int block = (joff>>2) + (ioff>>3); + + if(currMB->ipmode_DPCM == VERT_PRED) + { + for(i=0; i<8; ++i) + { + temp[0][i] = currSlice->mb_rres8[pl][block][0][i]; + temp[1][i] = currSlice->mb_rres8[pl][block][1][i] + temp[0][i]; + temp[2][i] = currSlice->mb_rres8[pl][block][2][i] + temp[1][i]; + temp[3][i] = currSlice->mb_rres8[pl][block][3][i] + temp[2][i]; + temp[4][i] = currSlice->mb_rres8[pl][block][4][i] + temp[3][i]; + temp[5][i] = currSlice->mb_rres8[pl][block][5][i] + temp[4][i]; + temp[6][i] = currSlice->mb_rres8[pl][block][6][i] + temp[5][i]; + temp[7][i] = currSlice->mb_rres8[pl][block][7][i] + temp[6][i]; + } + for(i=0; i<8; ++i) + { + currSlice->mb_rres8[pl][block][0][i]=temp[0][i]; + currSlice->mb_rres8[pl][block][1][i]=temp[1][i]; + currSlice->mb_rres8[pl][block][2][i]=temp[2][i]; + currSlice->mb_rres8[pl][block][3][i]=temp[3][i]; + currSlice->mb_rres8[pl][block][4][i]=temp[4][i]; + currSlice->mb_rres8[pl][block][5][i]=temp[5][i]; + currSlice->mb_rres8[pl][block][6][i]=temp[6][i]; + currSlice->mb_rres8[pl][block][7][i]=temp[7][i]; + } + } + else if(currMB->ipmode_DPCM == HOR_PRED)//HOR_PRED + { + for(i=0; i<8; ++i) + { + temp[i][0] = currSlice->mb_rres8[pl][block][i][0]; + temp[i][1] = currSlice->mb_rres8[pl][block][i][1] + temp[i][0]; + temp[i][2] = currSlice->mb_rres8[pl][block][i][2] + temp[i][1]; + temp[i][3] = currSlice->mb_rres8[pl][block][i][3] + temp[i][2]; + temp[i][4] = currSlice->mb_rres8[pl][block][i][4] + temp[i][3]; + temp[i][5] = currSlice->mb_rres8[pl][block][i][5] + temp[i][4]; + temp[i][6] = currSlice->mb_rres8[pl][block][i][6] + temp[i][5]; + temp[i][7] = currSlice->mb_rres8[pl][block][i][7] + temp[i][6]; + } + for(i=0; i<8; ++i) + { + currSlice->mb_rres8[pl][block][i][0]=temp[i][0]; + currSlice->mb_rres8[pl][block][i][1]=temp[i][1]; + currSlice->mb_rres8[pl][block][i][2]=temp[i][2]; + currSlice->mb_rres8[pl][block][i][3]=temp[i][3]; + currSlice->mb_rres8[pl][block][i][4]=temp[i][4]; + currSlice->mb_rres8[pl][block][i][5]=temp[i][5]; + currSlice->mb_rres8[pl][block][i][6]=temp[i][6]; + currSlice->mb_rres8[pl][block][i][7]=temp[i][7]; + } + } + + for (j = 0; j < BLOCK_SIZE_8x8; ++j) + { + for (i = 0; i < BLOCK_SIZE_8x8; ++i) + { + currSlice->mb_rec[pl][joff+j][ioff+i] = (imgpel) (currSlice->mb_rres8[pl][block][j][i] + currSlice->mb_pred[pl][joff+j][ioff+i]); + } + } +} + +/*! + *********************************************************************** + * \brief + * Luma DC inverse transform + *********************************************************************** + */ +void itrans_2(Macroblock *currMB, ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int transform_pl = IS_INDEPENDENT(p_Vid) ? PLANE_Y /*p_Vid->colour_plane_id*/ : pl; + h264_short_block_t *blocks = currSlice->cof4[transform_pl]; + int qp_scaled = currMB->qp_scaled[pl]; + + int qp_per = p_Vid->qp_per_matrix[ qp_scaled ]; + int qp_rem = p_Vid->qp_rem_matrix[ qp_scaled ]; + + int invLevelScale = currSlice->InvLevelScale4x4_Intra[pl][qp_rem][0][0]; + h264_int_block_t M4; + + // horizontal + M4[0][0]=blocks[0][0][0]; + M4[0][1]=blocks[1][0][0]; + M4[0][2]=blocks[4][0][0]; + M4[0][3]=blocks[5][0][0]; + M4[1][0]=blocks[2][0][0]; + M4[1][1]=blocks[3][0][0]; + M4[1][2]=blocks[6][0][0]; + M4[1][3]=blocks[7][0][0]; + M4[2][0]=blocks[8][0][0]; + M4[2][1]=blocks[9][0][0]; + M4[2][2]=blocks[12][0][0]; + M4[2][3]=blocks[13][0][0]; + M4[3][0]=blocks[10][0][0]; + M4[3][1]=blocks[11][0][0]; + M4[3][2]=blocks[14][0][0]; + M4[3][3]=blocks[15][0][0]; + + ihadamard4x4(M4); + + // vertical + blocks[0][0][0] = rshift_rnd((( M4[0][0] * invLevelScale) << qp_per), 6); + blocks[1][0][0] = rshift_rnd((( M4[0][1] * invLevelScale) << qp_per), 6); + blocks[4][0][0] = rshift_rnd((( M4[0][2] * invLevelScale) << qp_per), 6); + blocks[5][0][0] = rshift_rnd((( M4[0][3] * invLevelScale) << qp_per), 6); + blocks[2][0][0] = rshift_rnd((( M4[1][0] * invLevelScale) << qp_per), 6); + blocks[3][0][0] = rshift_rnd((( M4[1][1] * invLevelScale) << qp_per), 6); + blocks[6][0][0] = rshift_rnd((( M4[1][2] * invLevelScale) << qp_per), 6); + blocks[7][0][0] = rshift_rnd((( M4[1][3] * invLevelScale) << qp_per), 6); + blocks[8][0][0] = rshift_rnd((( M4[2][0] * invLevelScale) << qp_per), 6); + blocks[9][0][0] = rshift_rnd((( M4[2][1] * invLevelScale) << qp_per), 6); + blocks[12][0][0] = rshift_rnd((( M4[2][2] * invLevelScale) << qp_per), 6); + blocks[13][0][0] = rshift_rnd((( M4[2][3] * invLevelScale) << qp_per), 6); + blocks[10][0][0] = rshift_rnd((( M4[3][0] * invLevelScale) << qp_per), 6); + blocks[11][0][0] = rshift_rnd((( M4[3][1] * invLevelScale) << qp_per), 6); + blocks[14][0][0] = rshift_rnd((( M4[3][2] * invLevelScale) << qp_per), 6); + blocks[15][0][0] = rshift_rnd((( M4[3][3] * invLevelScale) << qp_per), 6); +} + + +void itrans_sp(h264_short_block_row_t *tblock, const h264_imgpel_macroblock_row_t *mb_pred, Macroblock *currMB, ColorPlane pl, int ioff, int joff) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + int i,j; + int ilev, icof; + + int qp = (currSlice->slice_type == SI_SLICE) ? currSlice->qs : p_Vid->qp; + int qp_per = p_Vid->qp_per_matrix[ qp ]; + int qp_rem = p_Vid->qp_rem_matrix[ qp ]; + + int qp_per_sp = p_Vid->qp_per_matrix[ currSlice->qs ]; + int qp_rem_sp = p_Vid->qp_rem_matrix[ currSlice->qs ]; + int q_bits_sp = Q_BITS + qp_per_sp; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + + const int (*InvLevelScale4x4) [4] = dequant_coef[qp_rem]; + const int (*InvLevelScale4x4SP)[4] = dequant_coef[qp_rem_sp]; + int **PBlock; + + get_mem2Dint(&PBlock, MB_BLOCK_SIZE, MB_BLOCK_SIZE); + + for (j=0; j< BLOCK_SIZE; ++j) + for (i=0; i< BLOCK_SIZE; ++i) + PBlock[j][i] = mb_pred[j+joff][i+ioff]; + + forward4x4(PBlock, PBlock, 0, 0); + + if(p_Vid->sp_switch || currSlice->slice_type==SI_SLICE) + { + for (j=0;j<BLOCK_SIZE;++j) + { + for (i=0;i<BLOCK_SIZE;++i) + { + // recovering coefficient since they are already dequantized earlier + icof = (tblock[j][i] >> qp_per) / InvLevelScale4x4[j][i]; + ilev = rshift_rnd_sf(iabs(PBlock[j][i]) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + ilev = isignab(ilev, PBlock[j][i]) + icof; + tblock[j][i] = ilev * InvLevelScale4x4SP[j][i] << qp_per_sp; + } + } + } + else + { + for (j=0;j<BLOCK_SIZE;++j) + { + for (i=0;i<BLOCK_SIZE;++i) + { + // recovering coefficient since they are already dequantized earlier + icof = (tblock[j][i] >> qp_per) / InvLevelScale4x4[j][i]; + ilev = PBlock[j][i] + ((icof * InvLevelScale4x4[j][i] * A[j][i] << qp_per) >> 6); + ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + tblock[j][i] = ilev * InvLevelScale4x4SP[j][i] << qp_per_sp; + } + } + } + + { + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + opt_itrans4x4(tblock, mb_pred, mb_rec, ioff, joff); + } + + free_mem2Dint(PBlock); +} + +void itrans_sp_cr(Macroblock *currMB, int uv) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int i,j,ilev, icof, n2,n1; + int mp1[BLOCK_SIZE]; + int qp_per,qp_rem; + int qp_per_sp,qp_rem_sp,q_bits_sp; + int **PBlock; + + get_mem2Dint(&PBlock, MB_BLOCK_SIZE, MB_BLOCK_SIZE); + + + qp_per = p_Vid->qp_per_matrix[ ((p_Vid->qp < 0 ? p_Vid->qp : QP_SCALE_CR[p_Vid->qp]))]; + qp_rem = p_Vid->qp_rem_matrix[ ((p_Vid->qp < 0 ? p_Vid->qp : QP_SCALE_CR[p_Vid->qp]))]; + + qp_per_sp = p_Vid->qp_per_matrix[ ((currSlice->qs < 0 ? currSlice->qs : QP_SCALE_CR[currSlice->qs]))]; + qp_rem_sp = p_Vid->qp_rem_matrix[ ((currSlice->qs < 0 ? currSlice->qs : QP_SCALE_CR[currSlice->qs]))]; + q_bits_sp = Q_BITS + qp_per_sp; + + if (currSlice->slice_type == SI_SLICE) + { + qp_per = qp_per_sp; + qp_rem = qp_rem_sp; + } + + for (j=0; j < p_Vid->mb_cr_size_y; ++j) + { + for (i=0; i < p_Vid->mb_cr_size_x; ++i) + { + PBlock[j][i] = currSlice->mb_pred[uv + 1][j][i]; + currSlice->mb_pred[uv + 1][j][i] = 0; + } + } + + for (n2=0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1=0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE) + { + forward4x4(PBlock, PBlock, n2, n1); + } + } + + // 2X2 transform of DC coeffs. + mp1[0] = (PBlock[0][0] + PBlock[4][0] + PBlock[0][4] + PBlock[4][4]); + mp1[1] = (PBlock[0][0] - PBlock[4][0] + PBlock[0][4] - PBlock[4][4]); + mp1[2] = (PBlock[0][0] + PBlock[4][0] - PBlock[0][4] - PBlock[4][4]); + mp1[3] = (PBlock[0][0] - PBlock[4][0] - PBlock[0][4] + PBlock[4][4]); + + if (p_Vid->sp_switch || currSlice->slice_type == SI_SLICE) + { + for (n2=0; n2 < 2; ++n2 ) + { + for (n1=0; n1 < 2; ++n1 ) + { + //quantization fo predicted block + ilev = rshift_rnd_sf(iabs (mp1[n1+n2*2]) * quant_coef[qp_rem_sp][0][0], q_bits_sp + 1); + //addition + ilev = isignab(ilev, mp1[n1+n2*2]) + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2][n1]][0][0]; + //dequantization + mp1[n1+n2*2] =ilev * dequant_coef[qp_rem_sp][0][0] << qp_per_sp; + } + } + + for (n2 = 0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1 = 0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE) + { + for (j = 0; j < BLOCK_SIZE; ++j) + { + for (i = 0; i < BLOCK_SIZE; ++i) + { + // recovering coefficient since they are already dequantized earlier + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = (currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] >> qp_per) / dequant_coef[qp_rem][j][i]; + + //quantization of the predicted block + ilev = rshift_rnd_sf(iabs(PBlock[n2 + j][n1 + i]) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + //addition of the residual + ilev = isignab(ilev,PBlock[n2 + j][n1 + i]) + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] ; + // Inverse quantization + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = ilev * dequant_coef[qp_rem_sp][j][i] << qp_per_sp; + } + } + } + } + } + else + { + for (n2=0; n2 < 2; ++n2 ) + { + for (n1=0; n1 < 2; ++n1 ) + { + ilev = mp1[n1+n2*2] + (((currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2][n1]][0][0] * dequant_coef[qp_rem][0][0] * A[0][0]) << qp_per) >> 5); + ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][0][0], q_bits_sp + 1); + //ilev = isignab(rshift_rnd_sf(iabs(ilev)* quant_coef[qp_rem_sp][0][0], q_bits_sp + 1), ilev); + mp1[n1+n2*2] = ilev * dequant_coef[qp_rem_sp][0][0] << qp_per_sp; + } + } + + for (n2 = 0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1 = 0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE) + { + for (j = 0; j< BLOCK_SIZE; ++j) + { + for (i = 0; i< BLOCK_SIZE; ++i) + { + // recovering coefficient since they are already dequantized earlier + icof = (currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] >> qp_per) / dequant_coef[qp_rem][j][i]; + //dequantization and addition of the predicted block + ilev = PBlock[n2 + j][n1 + i] + ((icof * dequant_coef[qp_rem][j][i] * A[j][i] << qp_per) >> 6); + //quantization and dequantization + ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = ilev * dequant_coef[qp_rem_sp][j][i] << qp_per_sp; + } + } + } + } + } + + currSlice->cof4[uv + 1][0][0][0] = (mp1[0] + mp1[1] + mp1[2] + mp1[3]) >> 1; + currSlice->cof4[uv + 1][1][0][0] = (mp1[0] + mp1[1] - mp1[2] - mp1[3]) >> 1; + currSlice->cof4[uv + 1][2][0][0] = (mp1[0] - mp1[1] + mp1[2] - mp1[3]) >> 1; + currSlice->cof4[uv + 1][3][0][0] = (mp1[0] - mp1[1] - mp1[2] + mp1[3]) >> 1; + + free_mem2Dint(PBlock); +} + +#if defined(_DEBUG) || !defined(_M_IX86) +void iMBtrans4x4(Macroblock *currMB, ColorPlane pl, int smb) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + StorablePicture *dec_picture = p_Vid->dec_picture; + + VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1]: dec_picture->imgY; + + // =============== 4x4 itrans ================ + // ------------------------------------------- + if (smb) + { + h264_short_block_t *blocks = currSlice->cof4[pl]; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + + itrans_sp(blocks[0], mb_pred, currMB, pl, 0, 0); + itrans_sp(blocks[1], mb_pred, currMB, pl, 4, 0); + itrans_sp(blocks[2], mb_pred, currMB, pl, 0, 4); + itrans_sp(blocks[3], mb_pred, currMB, pl, 4, 4); + itrans_sp(blocks[4], mb_pred, currMB, pl, 8, 0); + itrans_sp(blocks[5], mb_pred, currMB, pl, 12, 0); + itrans_sp(blocks[6], mb_pred, currMB, pl, 8, 4); + itrans_sp(blocks[7], mb_pred, currMB, pl, 12, 4); + itrans_sp(blocks[8], mb_pred, currMB, pl, 0, 8); + itrans_sp(blocks[9], mb_pred, currMB, pl, 4, 8); + itrans_sp(blocks[10], mb_pred, currMB, pl, 0, 12); + itrans_sp(blocks[11], mb_pred, currMB, pl, 4, 12); + itrans_sp(blocks[12], mb_pred, currMB, pl, 8, 8); + itrans_sp(blocks[13], mb_pred, currMB, pl, 12, 8); + itrans_sp(blocks[14], mb_pred, currMB, pl, 8, 12); + itrans_sp(blocks[15], mb_pred, currMB, pl, 12, 12); + } + else if (currMB->is_lossless) + { + Inv_Residual_trans_4x4(currMB, pl, 0, 0); + Inv_Residual_trans_4x4(currMB, pl, 4, 0); + Inv_Residual_trans_4x4(currMB, pl, 0, 4); + Inv_Residual_trans_4x4(currMB, pl, 4, 4); + Inv_Residual_trans_4x4(currMB, pl, 8, 0); + Inv_Residual_trans_4x4(currMB, pl, 12, 0); + Inv_Residual_trans_4x4(currMB, pl, 8, 4); + Inv_Residual_trans_4x4(currMB, pl, 12, 4); + Inv_Residual_trans_4x4(currMB, pl, 0, 8); + Inv_Residual_trans_4x4(currMB, pl, 4, 8); + Inv_Residual_trans_4x4(currMB, pl, 0, 12); + Inv_Residual_trans_4x4(currMB, pl, 4, 12); + Inv_Residual_trans_4x4(currMB, pl, 8, 8); + Inv_Residual_trans_4x4(currMB, pl, 12, 8); + Inv_Residual_trans_4x4(currMB, pl, 8, 12); + Inv_Residual_trans_4x4(currMB, pl, 12, 12); + } + else + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0); + opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0); + opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4); + opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4); + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8); + opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8); + opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12); + opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12); + } + + // construct picture from 4x4 blocks + opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_rec[pl]); +} +#endif +void iMBtrans8x8(Macroblock *currMB, ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1] : dec_picture->imgY; + + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[pl]; + h264_short_8x8block_t *mb_rres8 = currSlice->mb_rres8[pl]; + + if (currMB->is_lossless == FALSE) + { + opt_itrans8x8(mb_rec, mb_pred, mb_rres8[0], 0); + opt_itrans8x8(mb_rec, mb_pred, mb_rres8[1], 8); + opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[2], 0); + opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[3], 8); + } + else + { + itrans8x8_lossless(mb_rec, mb_pred, mb_rres8[0], 0); + itrans8x8_lossless(mb_rec, mb_pred, mb_rres8[1], 8); + itrans8x8_lossless(mb_rec+8, mb_pred+8, mb_rres8[2], 0); + itrans8x8_lossless(mb_rec+8, mb_pred+8, mb_rres8[3], 8); + } + + opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, mb_rec); +} + +void iTransform(Macroblock *currMB, ColorPlane pl, int smb) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + + int uv = pl-1; + + if ((currMB->cbp & 15) != 0 || smb) + { + if(currMB->luma_transform_size_8x8_flag == 0) // 4x4 inverse transform + { + iMBtrans4x4(currMB, pl, smb); + } + else // 8x8 inverse transform + { + iMBtrans8x8(currMB, pl); + } + } + else + { + VideoImage *curr_img = pl ? dec_picture->imgUV[uv] : dec_picture->imgY; + opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[pl]); + } +// TODO: fix 4x4 lossless + if (dec_picture->chroma_format_idc == YUV420) + { + VideoImage *curUV; + + for(uv=0;uv<2;++uv) + { + int pl = uv + 1; + + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + + // =============== 4x4 itrans ================ + // ------------------------------------------- + curUV = dec_picture->imgUV[uv]; + + if (!smb && (currMB->cbp>>4)) + { + if (currMB->is_lossless == FALSE) + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + else + { // lossless + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + itrans4x4_ls(blocks[0], mb_pred, mb_rec, 0, 0); + itrans4x4_ls(blocks[1], mb_pred, mb_rec, 4, 0); + itrans4x4_ls(blocks[2], mb_pred, mb_rec, 0, 4); + itrans4x4_ls(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + } + else if (smb) + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + itrans_sp_cr(currMB, uv); + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + else + { + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_pred); + } + } + } + else if (dec_picture->chroma_format_idc == YUV422) + { + VideoImage *curUV; + + for(uv=0;uv<2;++uv) + { + // =============== 4x4 itrans ================ + // ------------------------------------------- + int pl = uv + 1; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + curUV = dec_picture->imgUV[uv]; + + if (!smb && (currMB->cbp>>4)) + { + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + const h264_short_block_t *blocks = currSlice->cof4[pl]; + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + + copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec, 8, 16); + } + else if (smb) + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + itrans_sp_cr(currMB, uv); + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + + copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec, 8, 16); + } + else + { + copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_pred, 8, 16); + } + } + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (16x16) + ************************************************************************************* + */ +void copy_image_data_16x16(imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x) +{ + int j; + for(j=0; j<MB_BLOCK_SIZE; ++j) + { + memcpy(&imgBuf1[j][dest_x], &imgBuf2[j][src_x], MB_BLOCK_SIZE * sizeof (imgpel)); + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (16x16) + ************************************************************************************* + */ +#ifdef _M_IX86 +void copy_image_data_16x16_stride_sse(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + _asm + { + mov eax, dest + mov ecx, destination_stride + mov edx, source; + movaps xmm0, 0[edx] + movaps xmm1, 16[edx] + movaps xmm2, 32[edx] + movaps xmm3, 64[edx] + movups [eax], xmm0 // dest[0] + movups [eax+ecx], xmm1 // dest[1] + movups [eax+2*ecx], xmm2 // dest[2] + movups [eax+4*ecx], xmm3 // dest[4] + + movaps xmm0, 48[edx] + movaps xmm1, 96[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] + movups [eax+ecx], xmm0 // dest[3] + movups [eax+4*ecx], xmm1 // dest[6] + + movaps xmm0, 80[edx] + movaps xmm1, 128[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[4] from start) + movups [eax+ecx], xmm0 // dest[5] + movups [eax+4*ecx], xmm1 // dest[8] + + movaps xmm0, 112[edx] + movaps xmm1, 160[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[6] from start) + movups [eax+ecx], xmm0 // dest[7] + movups [eax+4*ecx], xmm1 // dest[10] + + movaps xmm0, 144[edx] + movaps xmm1, 192[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[8] from start) + movups [eax+ecx], xmm0 // dest[9] + movups [eax+4*ecx], xmm1 // dest[12] + + movaps xmm0, 176[edx] + movaps xmm1, 224[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[10] from start) + movups [eax+ecx], xmm0 // dest[11] + movups [eax+4*ecx], xmm1 // dest[14] + + movaps xmm0, 208[edx] + movaps xmm1, 240[edx] + lea eax, [eax+ecx] // dest = &dest[1] (dest[11] from start) + movups [eax+2*ecx], xmm0 // dest[13] + movups [eax+4*ecx], xmm1 // dest[15] + } +} +#endif + +void copy_image_data_16x16_stride_c(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + + int j; + for(j=0; j<MB_BLOCK_SIZE; j++) + { + memcpy(dest, source[j], MB_BLOCK_SIZE * sizeof (imgpel)); + dest+=destination_stride; + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (8x8) + ************************************************************************************* + */ +void copy_image_data_8x8_stride2(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2, int src_x, int src_y) +{ +#ifdef _M_IX86 + ptrdiff_t destination_stride = destination->stride; + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + _asm + { + mov eax, src_y + shl eax, 4 + add eax, src_x + add eax, imgBuf2 + + mov edx, dest + mov ecx, destination_stride + + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 16[eax] + movq mm2, MMWORD PTR 32[eax] + movq mm3, MMWORD PTR 48[eax] + movq mm4, MMWORD PTR 64[eax] + movq mm5, MMWORD PTR 80[eax] + movq mm6, MMWORD PTR 96[eax] + movq mm7, MMWORD PTR 112[eax] + + movntq [edx], mm0 + movntq [edx+ecx], mm1 + movntq [edx+2*ecx], mm2 + movntq [edx+4*ecx], mm4 + add edx, ecx + movntq 0[edx+2*ecx], mm3 + movntq 0[edx+4*ecx], mm5 + add edx, ecx + movntq 0[edx+4*ecx], mm6 + add edx, ecx + movntq 0[edx+4*ecx], mm7 + } +#else + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + int j; + for(j = 0; j < BLOCK_SIZE_8x8; ++j) + { + memcpy(dest, &imgBuf2[src_y+j][src_x], BLOCK_SIZE_8x8 * sizeof (imgpel)); + dest+=destination_stride; + } +#endif + +} + +void copy_image_data_8x8_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2) +{ +#ifdef _M_IX86 + ptrdiff_t destination_stride = destination->stride; + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + _asm + { + mov eax, imgBuf2 + mov edx, dest + mov ecx, destination_stride + + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 16[eax] + movq mm2, MMWORD PTR 32[eax] + movq mm3, MMWORD PTR 48[eax] + movq mm4, MMWORD PTR 64[eax] + movq mm5, MMWORD PTR 80[eax] + movq mm6, MMWORD PTR 96[eax] + movq mm7, MMWORD PTR 112[eax] + + movntq [edx], mm0 + movntq [edx+ecx], mm1 + movntq [edx+2*ecx], mm2 + movntq [edx+4*ecx], mm4 + add edx, ecx + movntq 0[edx+2*ecx], mm3 + movntq 0[edx+4*ecx], mm5 + add edx, ecx + movntq 0[edx+4*ecx], mm6 + add edx, ecx + movntq 0[edx+4*ecx], mm7 + } +#else + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + int j; + for(j = 0; j < BLOCK_SIZE_8x8; ++j) + { + memcpy(dest, &imgBuf2[j][0], BLOCK_SIZE_8x8 * sizeof (imgpel)); + dest+=destination_stride; + } +#endif +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (4x4) + ************************************************************************************* + */ + +void copy_image_data_4x4_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + h264_imgpel_macroblock_row_t *src = (h264_imgpel_macroblock_row_t *)source[src_y]; /* cast is for const */ + + int j; + for(j = 0; j < BLOCK_SIZE; ++j) + { + memcpy(dest, &src[j][src_x], BLOCK_SIZE * sizeof (imgpel)); + dest+=destination_stride; + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (8x8) + ************************************************************************************* + */ +void copy_image_data(imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x, int width, int height) +{ + int j; + for(j = 0; j < height; ++j) + { + memcpy(&imgBuf1[j][dest_x], &imgBuf2[j][src_x], width * sizeof (imgpel)); + } +} + +void copy_image_data_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2, int width, int height) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + #ifdef H264_IPP + IppiSize roi = {width,height}; + ippiCopy_8u_C1R(imgBuf2[0], sizeof(imgBuf2[0]), dest, destination_stride, roi); +#else + int j; + for(j = 0; j < height; ++j) + { + memcpy(dest, imgBuf2[j], width * sizeof (imgpel)); + dest+=destination_stride; + } +#endif +} diff --git a/Src/h264dec/ldecod/src/cabac.c b/Src/h264dec/ldecod/src/cabac.c new file mode 100644 index 00000000..a3c43513 --- /dev/null +++ b/Src/h264dec/ldecod/src/cabac.c @@ -0,0 +1,2123 @@ +/*! +************************************************************************************* +* \file cabac.c +* +* \brief +* CABAC entropy coding routines +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Detlev Marpe <marpe@hhi.de> +************************************************************************************** +*/ + +#include "global.h" +#include "cabac.h" +#include "memalloc.h" +#include "elements.h" +#include "image.h" +#include "biaridecod.h" +#include "mb_access.h" +#include "vlc.h" +#include <mmintrin.h> +#define get_bit(x, n) (_mm_cvtsi64_si32(_mm_srli_si64(*(__m64 *)&(x), n)) & 1) +/*static inline int get_bit(int64 x,int n) +{ +return (int)(((x >> n) & 1)); +}*/ + +static __forceinline void or_bits_low(int64 *x, int mask, int position) +{ + *(int32_t *)x |= (mask << position); +} + +static inline void or_bits(int64 *x, int mask, int position) +{ +#ifdef _M_IX86 + __m64 mmx_x = *(__m64 *)x; + __m64 mmx_mask = _mm_cvtsi32_si64(mask); + mmx_mask=_mm_slli_si64(mmx_mask, position); + mmx_x = _mm_or_si64(mmx_x, mmx_mask); + *(__m64 *)x = mmx_x; +#else + *x |= ((int64) mask << position); +#endif +} +#if TRACE +int symbolCount = 0; +#endif + +/*********************************************************************** +* L O C A L L Y D E F I N E D F U N C T I O N P R O T O T Y P E S +*********************************************************************** +*/ +static unsigned int unary_bin_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset); +static unsigned int unary_bin_max_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset, + unsigned int max_symbol); + +unsigned int unary_exp_golomb_mv_decode(DecodingEnvironmentPtr dep_dp, BiContextTypePtr ctx, unsigned int max_bin); +unsigned int unary_exp_golomb_mv_decode3(DecodingEnvironmentPtr dep_dp, BiContextTypePtr ctx); + +void CheckAvailabilityOfNeighborsCABAC(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + PixelPos up, left; + + p_Vid->getNeighbourLeftLuma(currMB, &left); + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (up.available) + currMB->mb_up = &p_Vid->mb_data[up.mb_addr]; + else + currMB->mb_up = NULL; + + if (left.available) + currMB->mb_left = &p_Vid->mb_data[left.mb_addr]; + else + currMB->mb_left = NULL; +} + +void cabac_new_slice(Slice *currSlice) +{ + currSlice->last_dquant=0; +} + +/*! +************************************************************************ +* \brief +* Allocation of contexts models for the motion info +* used for arithmetic decoding +* +************************************************************************ +*/ +MotionInfoContexts* create_contexts_MotionInfo(void) +{ + MotionInfoContexts *deco_ctx; + + deco_ctx = (MotionInfoContexts*) calloc(1, sizeof(MotionInfoContexts) ); + if( deco_ctx == NULL ) + no_mem_exit("create_contexts_MotionInfo: deco_ctx"); + + return deco_ctx; +} + + +/*! +************************************************************************ +* \brief +* Allocates of contexts models for the texture info +* used for arithmetic decoding +************************************************************************ +*/ +TextureInfoContexts* create_contexts_TextureInfo(void) +{ + TextureInfoContexts *deco_ctx; + + deco_ctx = (TextureInfoContexts*) calloc(1, sizeof(TextureInfoContexts) ); + if( deco_ctx == NULL ) + no_mem_exit("create_contexts_TextureInfo: deco_ctx"); + + return deco_ctx; +} + + +/*! +************************************************************************ +* \brief +* Frees the memory of the contexts models +* used for arithmetic decoding of the motion info. +************************************************************************ +*/ +void delete_contexts_MotionInfo(MotionInfoContexts *deco_ctx) +{ + if( deco_ctx == NULL ) + return; + + free( deco_ctx ); +} + + +/*! +************************************************************************ +* \brief +* Frees the memory of the contexts models +* used for arithmetic decoding of the texture info. +************************************************************************ +*/ +void delete_contexts_TextureInfo(TextureInfoContexts *deco_ctx) +{ + if( deco_ctx == NULL ) + return; + + free( deco_ctx ); +} + +Boolean readFieldModeInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + MotionInfoContexts *ctx = currSlice->mot_ctx; + int a = currMB->mb_avail_left ? p_Vid->mb_data[currMB->mb_addr_left].mb_field : 0; + int b = currMB->mb_avail_up ? p_Vid->mb_data[currMB->mb_addr_up].mb_field : 0; + int act_ctx = a + b; + + return biari_decode_symbol (dep_dp, &ctx->mb_aff_contexts[act_ctx]); +} + + +int check_next_mb_and_get_field_mode_CABAC(Slice *currSlice, DataPartition *act_dp) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + BiContextTypePtr mb_type_ctx_copy[3]; + BiContextTypePtr mb_aff_ctx_copy; + DecodingEnvironmentPtr dep_dp_copy; + + int length; + DecodingEnvironmentPtr dep_dp = &(act_dp->de_cabac); + + int bframe = (currSlice->slice_type == B_SLICE); + int skip = 0; + int field = 0; + int i; + + Macroblock *currMB; + + //get next MB + ++p_Vid->current_mb_nr; + + currMB = &p_Vid->mb_data[p_Vid->current_mb_nr]; + currMB->p_Vid = p_Vid; + currMB->p_Slice = currSlice; + currMB->slice_nr = p_Vid->current_slice_nr; + currMB->mb_field = p_Vid->mb_data[p_Vid->current_mb_nr-1].mb_field; + currMB->mbAddrX = p_Vid->current_mb_nr; + + CheckAvailabilityOfNeighbors(currMB); + CheckAvailabilityOfNeighborsCABAC(currMB); + + //create + dep_dp_copy = (DecodingEnvironmentPtr) calloc(1, sizeof(DecodingEnvironment) ); + for (i=0;i<3;++i) + mb_type_ctx_copy[i] = (BiContextTypePtr) calloc(NUM_MB_TYPE_CTX, sizeof(BiContextType) ); + mb_aff_ctx_copy = (BiContextTypePtr) calloc(NUM_MB_AFF_CTX, sizeof(BiContextType) ); + + //copy + memcpy(dep_dp_copy,dep_dp,sizeof(DecodingEnvironment)); + length = *(dep_dp_copy->Dcodestrm_len) = *(dep_dp->Dcodestrm_len); + for (i=0;i<3;++i) + memcpy(mb_type_ctx_copy[i], currSlice->mot_ctx->mb_type_contexts[i],NUM_MB_TYPE_CTX*sizeof(BiContextType) ); + memcpy(mb_aff_ctx_copy, currSlice->mot_ctx->mb_aff_contexts,NUM_MB_AFF_CTX*sizeof(BiContextType) ); + + //check_next_mb + currSlice->last_dquant = 0; + skip = readMB_skip_flagInfo_CABAC(currMB, dep_dp); + + if (!skip) + { + field = readFieldModeInfo_CABAC(currMB, dep_dp); + p_Vid->mb_data[p_Vid->current_mb_nr-1].mb_field = field; + } + + //reset + p_Vid->current_mb_nr--; + + memcpy(dep_dp,dep_dp_copy,sizeof(DecodingEnvironment)); + *(dep_dp->Dcodestrm_len) = length; + for (i=0;i<3;++i) + memcpy(currSlice->mot_ctx->mb_type_contexts[i],mb_type_ctx_copy[i], NUM_MB_TYPE_CTX*sizeof(BiContextType) ); + memcpy( currSlice->mot_ctx->mb_aff_contexts,mb_aff_ctx_copy,NUM_MB_AFF_CTX*sizeof(BiContextType) ); + + CheckAvailabilityOfNeighborsCABAC(currMB); + + //delete + free(dep_dp_copy); + for (i=0;i<3;++i) + free(mb_type_ctx_copy[i]); + free(mb_aff_ctx_copy); + + return skip; +} + + + + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the motion +* vector data of a B-frame MB. +************************************************************************ +*/ +#if defined(_DEBUG) || !defined(_M_IX86) +int decodeMVD_CABAC(DecodingEnvironmentPtr dep_dp, BiContextType mv_ctx[2][NUM_MV_RES_CTX], int act_ctx, int err) +{ + int act_sym = biari_decode_symbol(dep_dp,&mv_ctx[0][act_ctx+err] ); + + if (act_sym != 0) + { + int mv_sign; + act_sym = unary_exp_golomb_mv_decode3(dep_dp,mv_ctx[1]+act_ctx); + ++act_sym; + mv_sign = biari_decode_symbol_eq_prob(dep_dp); + + if(mv_sign) + act_sym = -act_sym; + } + return act_sym; +} +#else +int decodeMVD_CABAC(DecodingEnvironmentPtr dep_dp, BiContextType mv_ctx[2][NUM_MV_RES_CTX], int act_ctx, int err); +#endif + +int readMVD_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int k, int list_idx, int x, int y) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + MotionInfoContexts *ctx = currSlice->mot_ctx; + int a = 0, b = 0; +// int act_ctx; +// int act_sym; + int mv_local_err; + int err; + + PixelPos block_a, block_b; + + p_Vid->getNeighbourPXLumaNB_NoPos(currMB, y - 1, &block_b); + if (block_b.available) + { + b = abs(p_Vid->mb_data[block_b.mb_addr].mvd[list_idx][block_b.y>>2][x>>2][k]); + if (currSlice->mb_aff_frame_flag && (k==1)) + { + if ((currMB->mb_field==0) && (p_Vid->mb_data[block_b.mb_addr].mb_field==1)) + b *= 2; + else if ((currMB->mb_field==1) && (p_Vid->mb_data[block_b.mb_addr].mb_field==0)) + b /= 2; + } + } + + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, x - 1, y , &block_a); + if (block_a.available) + { + a = abs(p_Vid->mb_data[block_a.mb_addr].mvd[list_idx][block_a.y>>2][block_a.x>>2][k]); + if (currSlice->mb_aff_frame_flag && (k==1)) + { + if ((currMB->mb_field==0) && (p_Vid->mb_data[block_a.mb_addr].mb_field==1)) + a *= 2; + else if ((currMB->mb_field==1) && (p_Vid->mb_data[block_a.mb_addr].mb_field==0)) + a /= 2; + } + } + + if ((mv_local_err = a + b)<3) + err = 0; + else + { + if (mv_local_err > 32) + err = 3; + else + err = 2; + } + + return decodeMVD_CABAC(dep_dp, ctx->mv_res_contexts, 5*k, err); + /* + act_sym = biari_decode_symbol(dep_dp,&ctx->mv_res_contexts[0][act_ctx] ); + + if (act_sym != 0) + { + int mv_sign; + act_ctx = 5 * k; + act_sym = unary_exp_golomb_mv_decode3(dep_dp,ctx->mv_res_contexts[1]+act_ctx); + ++act_sym; + mv_sign = biari_decode_symbol_eq_prob(dep_dp); + + if(mv_sign) + act_sym = -act_sym; + } + return act_sym; + */ +} + + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the 8x8 block type. +************************************************************************ +*/ +int readB8_typeInfo_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp) +{ + int act_sym = 0; + int bframe = (currSlice->slice_type == B_SLICE); + + MotionInfoContexts *ctx = currSlice->mot_ctx; + + + if (!bframe) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][1])) + { + act_sym = 0; + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][3])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][4])) act_sym = 2; + else act_sym = 3; + } + else + { + act_sym = 1; + } + } + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][0])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][1])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][2])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) + { + act_sym = 10; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym++; + } + else + { + act_sym = 6; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=2; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym++; + } + } + else + { + act_sym=2; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=2; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=1; + } + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym = 1; + else act_sym = 0; + } + ++act_sym; + } + else + { + act_sym= 0; + } + } + return act_sym; +} + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the macroblock +* type info of a given MB. +************************************************************************ +*/ +#if defined(_DEBUG) || !defined(_M_IX86) +int readMB_skip_flagInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp) +{ + Slice *currSlice = currMB->p_Slice; + int bframe=(currSlice->slice_type == B_SLICE); + MotionInfoContexts *ctx = currSlice->mot_ctx; + int a = (currMB->mb_left != NULL) ? (currMB->mb_left->skip_flag == 0) : 0; + int b = (currMB->mb_up != NULL) ? (currMB->mb_up ->skip_flag == 0) : 0; + int act_ctx; + int skip; + + if (bframe) + { + act_ctx = 7 + a + b; + + skip = biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx]); + } + else + { + act_ctx = a + b; + + skip = biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][act_ctx]); + } + + if (skip) + { + currSlice->last_dquant = 0; + } + return skip; +} +#endif + +/*! +*************************************************************************** +* \brief +* This function is used to arithmetically decode the macroblock +* intra_pred_size flag info of a given MB. +*************************************************************************** +*/ + +Boolean readMB_transform_size_flag_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp) +{ + Slice *currSlice = currMB->p_Slice; + TextureInfoContexts*ctx = currSlice->tex_ctx; + + int b = (currMB->mb_up == NULL) ? 0 : currMB->mb_up->luma_transform_size_8x8_flag; + int a = (currMB->mb_left == NULL) ? 0 : currMB->mb_left->luma_transform_size_8x8_flag; + + int act_ctx = a + b; + int act_sym = biari_decode_symbol(dep_dp, ctx->transform_size_contexts + act_ctx); + + return act_sym; +} + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the macroblock +* type info of a given MB. +************************************************************************ +*/ +int readMB_typeInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp) +{ + Slice *currSlice = currMB->p_Slice; + MotionInfoContexts *ctx = currSlice->mot_ctx; + + int a = 0, b = 0; + int act_ctx; + int act_sym; + int bframe=(currSlice->slice_type == B_SLICE); + int mode_sym; + int curr_mb_type; + + if(currSlice->slice_type == I_SLICE) // INTRA-frame + { + if (currMB->mb_up != NULL) + b = (((currMB->mb_up)->mb_type != I4MB && currMB->mb_up->mb_type != I8MB) ? 1 : 0 ); + + if (currMB->mb_left != NULL) + a = (((currMB->mb_left)->mb_type != I4MB && currMB->mb_left->mb_type != I8MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx); + + if (act_sym==0) // 4x4 Intra + { + curr_mb_type = act_sym; + } + else // 16x16 Intra + { + mode_sym = biari_decode_final(dep_dp); + if(mode_sym == 1) + { + curr_mb_type = 25; + } + else + { + act_sym = 1; + act_ctx = 4; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); // decoding of AC/no AC + act_sym += mode_sym*12; + act_ctx = 5; + // decoding of cbp: 0,1,2 + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + if (mode_sym!=0) + { + act_ctx=6; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym+=4; + if (mode_sym!=0) + act_sym+=4; + } + // decoding of I pred-mode: 0,1,2,3 + act_ctx = 7; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym*2; + act_ctx = 8; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym; + curr_mb_type = act_sym; + } + } + } + else if(currSlice->slice_type == SI_SLICE) // SI-frame + { + // special ctx's for SI4MB + if (currMB->mb_up != NULL) + b = (( (currMB->mb_up)->mb_type != SI4MB) ? 1 : 0 ); + + if (currMB->mb_left != NULL) + a = (( (currMB->mb_left)->mb_type != SI4MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx); + + if (act_sym==0) // SI 4x4 Intra + { + curr_mb_type = 0; + } + else // analog INTRA_IMG + { + if (currMB->mb_up != NULL) + b = (( (currMB->mb_up)->mb_type != I4MB) ? 1 : 0 ); + + if (currMB->mb_left != NULL) + a = (( (currMB->mb_left)->mb_type != I4MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx); + + if (act_sym==0) // 4x4 Intra + { + curr_mb_type = 1; + } + else // 16x16 Intra + { + mode_sym = biari_decode_final(dep_dp); + if( mode_sym==1 ) + { + curr_mb_type = 26; + } + else + { + act_sym = 2; + act_ctx = 4; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); // decoding of AC/no AC + act_sym += mode_sym*12; + act_ctx = 5; + // decoding of cbp: 0,1,2 + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + if (mode_sym!=0) + { + act_ctx=6; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym+=4; + if (mode_sym!=0) + act_sym+=4; + } + // decoding of I pred-mode: 0,1,2,3 + act_ctx = 7; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym*2; + act_ctx = 8; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym; + curr_mb_type = act_sym; + } + } + } + } + else + { + if (bframe) + { + if (currMB->mb_up != NULL) + b = (( (currMB->mb_up)->mb_type != 0) ? 1 : 0 ); + + if (currMB->mb_left != NULL) + a = (( (currMB->mb_left)->mb_type != 0) ? 1 : 0 ); + + act_ctx = a + b; + + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx])) + { + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][4])) + { + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][5])) + { + act_sym=12; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=8; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=4; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=2; + + if (act_sym==24) act_sym=11; + else if (act_sym==26) act_sym=22; + else + { + if (act_sym==22) act_sym=23; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=1; + } + } + else + { + act_sym=3; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=4; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=2; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=1; + } + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym=2; + else act_sym=1; + } + } + else + { + act_sym = 0; + } + } + else // P-frame + { + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][4] )) + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][7] )) act_sym = 7; + else act_sym = 6; + } + else + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][5] )) + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][7] )) act_sym = 2; + else act_sym = 3; + } + else + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][6] )) act_sym = 4; + else act_sym = 1; + } + } + } + } + + if (act_sym<=6 || (((currSlice->slice_type == B_SLICE) ? 1 : 0) && act_sym<=23)) + { + curr_mb_type = act_sym; + } + else // additional info for 16x16 Intra-mode + { + mode_sym = biari_decode_final(dep_dp); + if( mode_sym==1 ) + { + if(bframe) // B frame + curr_mb_type = 48; + else // P frame + curr_mb_type = 31; + } + else + { + act_ctx = 8; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); // decoding of AC/no AC + act_sym += mode_sym*12; + + // decoding of cbp: 0,1,2 + act_ctx = 9; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + if (mode_sym != 0) + { + act_sym+=4; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + if (mode_sym != 0) + act_sym+=4; + } + + // decoding of I pred-mode: 0,1,2,3 + act_ctx = 10; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + act_sym += mode_sym*2; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + act_sym += mode_sym; + curr_mb_type = act_sym; + } + } + } + return curr_mb_type; +} + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode a pair of +* intra prediction modes of a given MB. +************************************************************************ +*/ +#if defined(_DEBUG) || !defined(_M_IX86) +int readIntraPredMode_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp) +{ + TextureInfoContexts *ctx = currSlice->tex_ctx; + int act_sym; + + // use_most_probable_mode + act_sym = biari_decode_symbol(dep_dp, ctx->ipr_contexts); + + // remaining_mode_selector + if (act_sym == 1) + { + return -1; + } + else + { + int pred_mode=0; + pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) ); + pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 1); + pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 2); + return pred_mode; + } +} +#endif +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the reference +* parameter of a given MB. +************************************************************************ +*/ +char readRefFrame_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int x, int y) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + MotionInfoContexts *ctx = currSlice->mot_ctx; + Macroblock *neighborMB = NULL; + + int addctx = 0; + int a = 0, b = 0; + int act_ctx; + int act_sym; + PicMotion **refframe_array = dec_picture->motion.motion[list]; + + PixelPos block_a, block_b; + + p_Vid->getNeighbourPXLuma(currMB, x, y - 1, &block_b); + // TODO: this gets called with x << 2 and y << 2, so we can undo the internal >> 2 easily by just passing x and y + if (block_b.available) + { + int b8b=((block_b.x >> 3) & 0x01)+((block_b.y>>2) & 0x02); + neighborMB = &p_Vid->mb_data[block_b.mb_addr]; + if (!( (neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8b]==0 && neighborMB->b8pdir[b8b]==2))) + { + if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == TRUE)) + b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 1 ? 2 : 0); + else + b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 0 ? 2 : 0); + } + } + + p_Vid->getNeighbourXPLuma(currMB, x - 1, y , &block_a); + if (block_a.available) + { + int b8a=((block_a.x >> 3) & 0x01)+((block_a.y>>2) & 0x02); + neighborMB = &p_Vid->mb_data[block_a.mb_addr]; + if (!((neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8a]==0 && neighborMB->b8pdir[b8a]==2))) + { + if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == 1)) + a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 1 ? 1 : 0); + else + a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 0 ? 1 : 0); + } + } + + act_ctx = a + b; + + act_sym = biari_decode_symbol(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx ); + + if (act_sym != 0) + { + act_ctx = 4; + act_sym = unary_bin_decode(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx,1); + ++act_sym; + } + return act_sym; +} + +// x == 0 +char readRefFrame_CABAC0(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int y) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + MotionInfoContexts *ctx = currSlice->mot_ctx; + Macroblock *neighborMB = NULL; + + int addctx = 0; + int a = 0, b = 0; + int act_ctx; + int act_sym; + PicMotion **refframe_array = dec_picture->motion.motion[list]; + + PixelPos block_a, block_b; + + p_Vid->getNeighbour0XLuma(currMB, y - 1, &block_b); + // TODO: this gets called with x << 2 and y << 2, so we can undo the internal >> 2 easily by just passing x and y + if (block_b.available) + { + int b8b=0+((block_b.y>>2) & 0x02); + neighborMB = &p_Vid->mb_data[block_b.mb_addr]; + if (!( (neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8b]==0 && neighborMB->b8pdir[b8b]==2))) + { + if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == TRUE)) + b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 1 ? 2 : 0); + else + b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 0 ? 2 : 0); + } + } + + p_Vid->getNeighbourNXLuma(currMB, y , &block_a); + if (block_a.available) + { + int b8a=((15 >> 3) & 0x01)+((block_a.y>>2) & 0x02); + neighborMB = &p_Vid->mb_data[block_a.mb_addr]; + if (!((neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8a]==0 && neighborMB->b8pdir[b8a]==2))) + { + if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == 1)) + a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 1 ? 1 : 0); + else + a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 0 ? 1 : 0); + } + } + + act_ctx = a + b; + + act_sym = biari_decode_symbol(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx ); + + if (act_sym != 0) + { + act_ctx = 4; + act_sym = unary_bin_decode(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx,1); + ++act_sym; + } + return act_sym; +} + + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the delta qp +* of a given MB. +************************************************************************ +*/ +#if defined(_DEBUG) || !defined(_M_IX86) +short readDquant_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp) +{ + MotionInfoContexts *ctx = currSlice->mot_ctx; + short dquant; + int act_ctx = ((currSlice->last_dquant != 0) ? 1 : 0); + int act_sym = biari_decode_symbol(dep_dp,ctx->delta_qp_contexts + act_ctx ); + + if (act_sym != 0) + { + act_ctx = 2; + act_sym = unary_bin_decode(dep_dp,ctx->delta_qp_contexts + act_ctx,1); + ++act_sym; + } + + dquant = (act_sym + 1) >> 1; + if((act_sym & 0x01)==0) // lsb is signed bit + dquant = -dquant; + + currSlice->last_dquant = dquant; + return dquant; +} +#endif +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the coded +* block pattern of a given MB. +************************************************************************ +*/ +int readCBP_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp) +{ + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + Slice *currSlice = currMB->p_Slice; + TextureInfoContexts *ctx = currSlice->tex_ctx; + Macroblock *neighborMB = NULL; + + int a, b; + int curr_cbp_ctx; + int cbp = 0; + int cbp_bit; + PixelPos block_a; + + // coding of luma part (bit by bit) + neighborMB = currMB->mb_up; + b = 0; + + if (neighborMB != NULL) + { + if(neighborMB->mb_type!=IPCM) + b = (( (neighborMB->cbp & 4) == 0) ? 2 : 0); + } + + p_Vid->getNeighbourLeftLuma(currMB, &block_a); + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + a = 0; + else + a = (( (p_Vid->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>3)+1))) == 0) ? 1 : 0); + } + else + a=0; + + curr_cbp_ctx = a + b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx ); + //if (cbp_bit) + cbp += cbp_bit;//1; + + if (neighborMB != NULL) + { + if(neighborMB->mb_type!=IPCM) + b = (( (neighborMB->cbp & 8) == 0) ? 2 : 0); + } + + a = ( ((cbp & 1) == 0) ? 1: 0); + + curr_cbp_ctx = a + b; + + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx ); + //if (cbp_bit) + cbp += (cbp_bit << 1); //2; + + b = ( ((cbp & 1) == 0) ? 2: 0); + + p_Vid->getNeighbourNPLumaNB(currMB, 8, &block_a); + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + a = 0; + else + a = (( (p_Vid->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>3)+1))) == 0) ? 1 : 0); + } + else + a=0; + + curr_cbp_ctx = a + b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx ); + //if (cbp_bit) + cbp += (cbp_bit << 2); //4; + + b = ( ((cbp & 2) == 0) ? 2: 0); + a = ( ((cbp & 4) == 0) ? 1: 0); + + curr_cbp_ctx = a + b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx ); + //if (cbp_bit) + cbp += (cbp_bit << 3); //8; + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + // coding of chroma part + // CABAC decoding for BinIdx 0 + b = 0; + neighborMB = currMB->mb_up; + if (neighborMB != NULL) + { + if (neighborMB->mb_type==IPCM || (neighborMB->cbp > 15)) + b = 2; + } + + a = 0; + neighborMB = currMB->mb_left; + if (neighborMB != NULL) + { + if (neighborMB->mb_type==IPCM || (neighborMB->cbp > 15)) + a = 1; + } + + curr_cbp_ctx = a + b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[1] + curr_cbp_ctx ); + + // CABAC decoding for BinIdx 1 + if (cbp_bit) // set the chroma bits + { + b = 0; + neighborMB = currMB->mb_up; + if (neighborMB != NULL) + { + //if ((neighborMB->mb_type == IPCM) || ((neighborMB->cbp > 15) && ((neighborMB->cbp >> 4) == 2))) + if ((neighborMB->mb_type == IPCM) || ((neighborMB->cbp >> 4) == 2)) + b = 2; + } + + + a = 0; + neighborMB = currMB->mb_left; + if (neighborMB != NULL) + { + if ((neighborMB->mb_type == IPCM) || ((neighborMB->cbp >> 4) == 2)) + a = 1; + } + + curr_cbp_ctx = a + b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[2] + curr_cbp_ctx ); + cbp += (16 << cbp_bit); // ? 32 : 16; + } + } + + + if (!cbp) + { + currSlice->last_dquant = 0; + } + + return cbp; +} + +/*! +************************************************************************ +* \brief +* This function is used to arithmetically decode the chroma +* intra prediction mode of a given MB. +************************************************************************ +*/ +char readCIPredMode_CABAC(Macroblock *currMB, + DecodingEnvironmentPtr dep_dp) +{ + Slice *currSlice = currMB->p_Slice; + TextureInfoContexts *ctx = currSlice->tex_ctx; + int act_sym; + + Macroblock *MbUp = currMB->mb_up; + Macroblock *MbLeft = currMB->mb_left; + + int b = (MbUp != NULL) ? (((MbUp->c_ipred_mode != 0) && (MbUp->mb_type != IPCM)) ? 1 : 0) : 0; + int a = (MbLeft != NULL) ? (((MbLeft->c_ipred_mode != 0) && (MbLeft->mb_type != IPCM)) ? 1 : 0) : 0; + int act_ctx = a + b; + + act_sym = biari_decode_symbol(dep_dp, ctx->cipr_contexts + act_ctx ); + + if (act_sym != 0) + act_sym = unary_bin_max_decode(dep_dp, ctx->cipr_contexts + 3, 0, 1) + 1; + return act_sym; + +} + +static const byte maxpos [] = {15, 14, 63, 31, 31, 15, 3, 14, 7, 15, 15, 14, 63, 31, 31, 15, 15, 14, 63, 31, 31, 15}; +static const byte c1isdc [] = { 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}; +static const byte type2ctx_bcbp[] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5, 10, 11, 12, 13, 13, 14, 16, 17, 18, 19, 19, 20}; +static const byte type2ctx_map [] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; // 8 +static const byte type2ctx_last[] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; // 8 +static const byte type2ctx_one [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5, 10, 11, 12, 13, 13, 14, 16, 17, 18, 19, 19, 20}; // 7 +static const byte type2ctx_abs [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5, 10, 11, 12, 13, 13, 14, 16, 17, 18, 19, 19, 20}; // 7 +static const byte max_c2 [] = { 4, 4, 4, 4, 4, 4, 3, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; // 9 + + + +/*! +************************************************************************ +* \brief +* Read CBP4-BIT +************************************************************************ +*/ +static int read_and_store_CBP_block_bit_444(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + TextureInfoContexts *tex_ctx = currSlice->tex_ctx; + + int y_ac = (type==LUMA_16AC || type==LUMA_8x8 || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4 + || type==CB_16AC || type==CB_8x8 || type==CB_8x4 || type==CB_4x8 || type==CB_4x4 + || type==CR_16AC || type==CR_8x8 || type==CR_8x4 || type==CR_4x8 || type==CR_4x4); + int y_dc = (type==LUMA_16DC || type==CB_16DC || type==CR_16DC); + int u_ac = (type==CHROMA_AC && !currMB->is_v_block); + int v_ac = (type==CHROMA_AC && currMB->is_v_block); + int chroma_dc = (type==CHROMA_DC || type==CHROMA_DC_2x4 || type==CHROMA_DC_4x4); + int u_dc = (chroma_dc && !currMB->is_v_block); + int v_dc = (chroma_dc && currMB->is_v_block); + int j = (y_ac || u_ac || v_ac ? currMB->subblock_y : 0); + int i = (y_ac || u_ac || v_ac ? currMB->subblock_x : 0); + int bit = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 35); + int default_bit = (currMB->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int cbp_bit = 1; // always one for 8x8 mode + int ctx; + int bit_pos_a = 0; + int bit_pos_b = 0; + + PixelPos block_a, block_b; + if (y_ac) + { + get4x4NeighbourLuma(currMB, i - 1, j , &block_a); + get4x4NeighbourLuma(currMB, i , j - 1, &block_b); + if (block_a.available) + bit_pos_a = 4*block_a.y + block_a.x; + if (block_b.available) + bit_pos_b = 4*block_b.y + block_b.x; + } + else if (y_dc) + { + get4x4NeighbourLuma(currMB, i - 1, j , &block_a); + get4x4NeighbourLuma(currMB, i , j - 1, &block_b); + } + else if (u_ac||v_ac) + { + get4x4Neighbour(currMB, i - 1, j , p_Vid->mb_size[IS_CHROMA], &block_a); + get4x4Neighbour(currMB, i , j - 1, p_Vid->mb_size[IS_CHROMA], &block_b); + if (block_a.available) + bit_pos_a = 4*block_a.y + block_a.x; + if (block_b.available) + bit_pos_b = 4*block_b.y + block_b.x; + } + else + { + get4x4Neighbour(currMB, i - 1, j , p_Vid->mb_size[IS_CHROMA], &block_a); + get4x4Neighbour(currMB, i , j - 1, p_Vid->mb_size[IS_CHROMA], &block_b); + } + + if (dec_picture->chroma_format_idc!=YUV444) + { + if (type!=LUMA_8x8) + { + //--- get bits from neighboring blocks --- + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], bit + bit_pos_b); + } + + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit + bit_pos_a); + } + + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + } + } + else if( IS_INDEPENDENT(p_Vid) ) + { + if (type!=LUMA_8x8) + { + //--- get bits from neighbouring blocks --- + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit = 1; + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0],bit+bit_pos_b); + } + + + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit = 1; + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit+bit_pos_a); + } + + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + } + } + else { + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + { + if(type==LUMA_8x8) + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits_8x8[0], bit + bit_pos_b); + else if (type==CB_8x8) + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits_8x8[1], bit + bit_pos_b); + else if (type==CR_8x8) + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits_8x8[2], bit + bit_pos_b); + else if ((type==CB_4x4)||(type==CB_4x8)||(type==CB_8x4)||(type==CB_16AC)||(type==CB_16DC)) + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[1],bit+bit_pos_b); + else if ((type==CR_4x4)||(type==CR_4x8)||(type==CR_8x4)||(type==CR_16AC)||(type==CR_16DC)) + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[2],bit+bit_pos_b); + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0],bit+bit_pos_b); + } + } + + + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + { + if(type==LUMA_8x8) + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits_8x8[0],bit+bit_pos_a); + else if (type==CB_8x8) + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits_8x8[1],bit+bit_pos_a); + else if (type==CR_8x8) + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits_8x8[2],bit+bit_pos_a); + else if ((type==CB_4x4)||(type==CB_4x8)||(type==CB_8x4)||(type==CB_16AC)||(type==CB_16DC)) + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[1],bit+bit_pos_a); + else if ((type==CR_4x4)||(type==CR_4x8)||(type==CR_8x4)||(type==CR_16AC)||(type==CR_16DC)) + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[2],bit+bit_pos_a); + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit+bit_pos_a); + } + } + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + } + + //--- set bits for current block --- + bit = (y_dc ? 0 : y_ac ? 1 + j + (i >> 2) : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 + j + (i >> 2) : 35 + j + (i >> 2)); + + if (cbp_bit) + { + if (type==LUMA_8x8) + { + currMB->cbp_bits[0] |= ((int64) 0x33 << bit ); + + if (dec_picture->chroma_format_idc==YUV444) + { + currMB->cbp_bits_8x8[0] |= ((int64) 0x33 << bit ); + } + } + else if (type==CB_8x8) + { + currMB->cbp_bits_8x8[1] |= ((int64) 0x33 << bit ); + currMB->cbp_bits[1] |= ((int64) 0x33 << bit ); + } + else if (type==CR_8x8) + { + currMB->cbp_bits_8x8[2] |= ((int64) 0x33 << bit ); + currMB->cbp_bits[2] |= ((int64) 0x33 << bit ); + } + else if (type==LUMA_8x4) + { + currMB->cbp_bits[0] |= ((int64) 0x03 << bit ); + } + else if (type==CB_8x4) + { + currMB->cbp_bits[1] |= ((int64) 0x03 << bit ); + } + else if (type==CR_8x4) + { + currMB->cbp_bits[2] |= ((int64) 0x03 << bit ); + } + else if (type==LUMA_4x8) + { + currMB->cbp_bits[0] |= ((int64) 0x11<< bit ); + } + else if (type==CB_4x8) + { + currMB->cbp_bits[1] |= ((int64)0x11<< bit ); + } + else if (type==CR_4x8) + { + currMB->cbp_bits[2] |= ((int64)0x11<< bit ); + } + else if ((type==CB_4x4)||(type==CB_16AC)||(type==CB_16DC)) + { + currMB->cbp_bits[1] |= ((int64)0x01<<bit); + } + else if ((type==CR_4x4)||(type==CR_16AC)||(type==CR_16DC)) + { + currMB->cbp_bits[2] |= ((int64)0x01<<bit); + } + else + { + currMB->cbp_bits[0] |= ((int64)0x01<<bit); + } + } + return cbp_bit; +} + + + +/*! +************************************************************************ +* \brief +* Read CBP4-BIT +************************************************************************ +*/ +static int read_and_store_CBP_block_bit_normal(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + TextureInfoContexts *tex_ctx = currSlice->tex_ctx; + int cbp_bit = 1; // always one for 8x8 mode + + if (type==LUMA_16DC) + { + + int upper_bit = 1; + int left_bit = 1; + int ctx; + + PixelPos block_a, block_b; + + //--- get bits from neighboring blocks --- + p_Vid->getNeighbour0X(currMB, -1, p_Vid->mb_size[IS_LUMA], &block_b); + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = (int)p_Vid->mb_data[block_b.mb_addr].cbp_bits[0]&1; + } + + p_Vid->getNeighbourX0(currMB, -1, p_Vid->mb_size[IS_LUMA], &block_a); + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = (int)p_Vid->mb_data[block_a.mb_addr].cbp_bits[0]&1; + } + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[LUMA_16DC]] + ctx); + + //--- set bits for current block --- + + if (cbp_bit) + { + currMB->cbp_bits[0] |= 0x01; + } + } + else if (type == LUMA_8x8) + { + int j = currMB->subblock_y; + int i = currMB->subblock_x; + + //--- set bits for current block --- + int bit = 1 + j + (i >> 2); + + or_bits(&currMB->cbp_bits[0], 0x33, bit); + } + else if (type <= LUMA_4x4) // type==LUMA_16AC || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4) + { + int j = currMB->subblock_y; + int i = currMB->subblock_x; + int bit; + int default_bit = (currMB->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int ctx; + + //--- get bits from neighboring blocks --- + PixelPos block_a, block_b; + p_Vid->getNeighbourPXLumaNB_NoPos(currMB, j-1, &block_b); + if (block_b.available) + { + int bit_pos_b = (block_b.y&((short)~3)) + (i>>2); + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], 1 + bit_pos_b); + } + + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, i-1, j, &block_a); + if (block_a.available) + { + int bit_pos_a = (block_a.y&((short)~3)) + (block_a.x>>2); + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],1 + bit_pos_a); + } + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + + + //--- set bits for current block --- + bit = 1 + j + (i >> 2); + + if (cbp_bit) + { + if (type==LUMA_8x4) + { + or_bits_low(&currMB->cbp_bits[0], 0x03, bit); + } + else if (type==LUMA_4x8) + { + or_bits_low(&currMB->cbp_bits[0], 0x011, bit); + } + else + { + or_bits_low(&currMB->cbp_bits[0], 0x01, bit); + } + } + } + else if (type == CHROMA_AC) + { + int u_ac = !currMB->is_v_block; + + int default_bit = (currMB->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int ctx; + + PixelPos block_a, block_b; + + int j = currMB->subblock_y; + int i = currMB->subblock_x; + int bit = (u_ac ? 19 : 35); + + p_Vid->getNeighbourXP_NoPos(currMB, i - 1, j , p_Vid->mb_size[IS_CHROMA], &block_a); + p_Vid->getNeighbourPX_NoPos(currMB, i , j - 1, p_Vid->mb_size[IS_CHROMA], &block_b); + + //--- get bits from neighboring blocks --- + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], bit + (block_b.y&((short)~3)) + (block_b.x>>2)); + } + + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit + (block_a.y&((short)~3)) + (block_a.x>>2)); + } + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[CHROMA_AC]] + ctx); + + + //--- set bits for current block --- + if (cbp_bit) + { + or_bits(&currMB->cbp_bits[0], 0x01, bit + j + (i >> 2)); + } + + } + else if (type <= CHROMA_DC_4x4) + { + int v_dc = currMB->is_v_block; + int default_bit = (currMB->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int ctx; + + + PixelPos block_a, block_b; + + int bit = (v_dc ? 18 : 17); + p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[IS_CHROMA], &block_a); + p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &block_b); + //--- get bits from neighboring blocks --- + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], bit); + } + + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit); + } + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + + + //--- set bits for current block --- + if (cbp_bit) + { + or_bits(&currMB->cbp_bits[0], 0x01, bit); + } + + + } + else + { + int default_bit = (currMB->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int ctx; + + + PixelPos block_a, block_b; + + p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[IS_CHROMA], &block_a); + p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &block_b); + //--- get bits from neighboring blocks --- + if (block_b.available) + { + if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], 35); + } + + if (block_a.available) + { + if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],35); + } + + ctx = 2 * upper_bit + left_bit; + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + + + //--- set bits for current block --- + if (cbp_bit) + { + or_bits(&currMB->cbp_bits[0], 0x01, 35); + } + + + } + return cbp_bit; +} + + +void set_read_and_store_CBP(Macroblock **currMB, int chroma_format_idc) +{ + if (chroma_format_idc == YUV444) + (*currMB)->read_and_store_CBP_block_bit = read_and_store_CBP_block_bit_444; + else + (*currMB)->read_and_store_CBP_block_bit = read_and_store_CBP_block_bit_normal; +} + + + + + +//===== position -> ctx for MAP ===== +//--- zig-zag scan ---- +static const byte pos2ctx_map8x8 [] = { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, +4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9, 10, 9, 8, 7, +7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, 11, +12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11 ,14, 10, 12, 14}; // 15 CTX +static const byte pos2ctx_map8x4 [] = { 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 9, 8, 6, 7, 8, +9, 10, 11, 9, 8, 6, 12, 8, 9, 10, 11, 9, 13, 13, 14, 14}; // 15 CTX +static const byte pos2ctx_map4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14}; // 15 CTX +static const byte pos2ctx_map2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX +static const byte pos2ctx_map4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX +static const byte* pos2ctx_map [] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8, pos2ctx_map8x4, +pos2ctx_map8x4, pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4, +pos2ctx_map2x4c, pos2ctx_map4x4c, +pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8,pos2ctx_map8x4, +pos2ctx_map8x4, pos2ctx_map4x4, +pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8,pos2ctx_map8x4, +pos2ctx_map8x4,pos2ctx_map4x4}; +//--- interlace scan ---- +//taken from ABT +static const byte pos2ctx_map8x8i[] = { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, +6, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 11, 12, 11, +9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 13, 13, 9, +9, 10, 10, 8, 13, 13, 9, 9, 10, 10, 14, 14, 14, 14, 14, 14}; // 15 CTX +static const byte pos2ctx_map8x4i[] = { 0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 3, 4, 7, 6, 8, +9, 7, 6, 8, 9, 10, 11, 12, 12, 10, 11, 13, 13, 14, 14, 14}; // 15 CTX +static const byte pos2ctx_map4x8i[] = { 0, 1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 6, 2, 7, 7, 8, +8, 8, 5, 6, 9, 10, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14}; // 15 CTX +static const byte* pos2ctx_map_int[] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i, +pos2ctx_map4x8i,pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4, +pos2ctx_map2x4c, pos2ctx_map4x4c, +pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i, +pos2ctx_map8x4i,pos2ctx_map4x4, +pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i, +pos2ctx_map8x4i,pos2ctx_map4x4}; + +//===== position -> ctx for LAST ===== +static const byte pos2ctx_last8x8 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, +5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8}; // 9 CTX +static const byte pos2ctx_last8x4 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, +3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8}; // 9 CTX + +static const byte pos2ctx_last4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // 15 CTX +static const byte pos2ctx_last2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX +static const byte pos2ctx_last4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX +static const byte* pos2ctx_last [] = {pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8, pos2ctx_last8x4, +pos2ctx_last8x4, pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last4x4, +pos2ctx_last2x4c, pos2ctx_last4x4c, +pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8,pos2ctx_last8x4, +pos2ctx_last8x4, pos2ctx_last4x4, +pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8,pos2ctx_last8x4, +pos2ctx_last8x4, pos2ctx_last4x4}; + + + +/*! +************************************************************************ +* \brief +* Read Significance MAP +************************************************************************ +*/ + +#if defined(_DEBUG) || defined(_M_X64) +static int read_significance_map(TextureInfoContexts *tex_ctx, const Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type, int16_t coeff[]) +{ + int i; + int coeff_ctr = 0; + int i0 = 0; + int i1 = maxpos[type]; + const VideoParameters *p_Vid = currMB->p_Vid; + + int fld = ( p_Vid->structure!=FRAME || currMB->mb_field ); + const byte *pos2ctx_Map = (fld) ? pos2ctx_map_int[type] : pos2ctx_map[type]; + const byte *last = pos2ctx_last[type]; + + BiContextTypePtr map_ctx = tex_ctx->map_contexts[fld][type2ctx_map [type]]; + BiContextTypePtr last_ctx = tex_ctx->last_contexts[fld][type2ctx_last[type]]; + + if (!c1isdc[type]) + { + pos2ctx_Map++; + last++; + } + + for (i=0; i < i1; ++i) // if last coeff is reached, it has to be significant + { + //--- read significance symbol --- + if (biari_decode_symbol (dep_dp, map_ctx + pos2ctx_Map[i])) + { + coeff[i] = 1; + ++coeff_ctr; + //--- read last coefficient symbol --- + if (biari_decode_symbol (dep_dp, last_ctx + last[i])) + { + while (i++ < i1) + { + coeff[i] = 0; + } + return coeff_ctr; + //memset(&coeff[i + 1], 0, (i1 - i) * sizeof(int)); + //i = i1; + } + } + else + { + coeff[i] = 0; + } + } + //--- last coefficient must be significant if no last symbol was received --- + coeff[i] = 1; + + + return coeff_ctr+1; +} +#endif +/*! +************************************************************************ +* \brief +* Read Levels +************************************************************************ +*/ +#if defined(_DEBUG) || defined(_M_X64) +/*! +************************************************************************ +* \brief +* Exp-Golomb decoding for LEVELS +*********************************************************************** +*/ +unsigned int exp_golomb_decode_eq_prob( DecodingEnvironmentPtr dep_dp, int k); +static unsigned int unary_exp_golomb_level_decode( DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx) +{ + unsigned int symbol = biari_decode_symbol(dep_dp, ctx ); + + if (symbol==0) + return 0; + else + { + const unsigned int exp_start = 13; + + for (symbol=0;symbol<(exp_start-1);symbol++) + { + if (!biari_decode_symbol(dep_dp, ctx)) + return symbol; + } + return exp_golomb_decode_eq_prob(dep_dp,0)+13; + } +} + +static void read_significant_coefficients (TextureInfoContexts *tex_ctx, + DecodingEnvironmentPtr dep_dp, + int type, + int16_t coeff[]) +{ + static const int plus_one_clip4[5] = { 1, 2, 3, 4, 4 }; + static const int plus_one_clip3[4] = { 1, 2, 3, 3 }; + const int *c2_clip = (max_c2[type]==4)?plus_one_clip4:plus_one_clip3; + int i; + int c1 = 1; + int c2 = 0; + BiContextType *one_contexts = tex_ctx->one_contexts[type2ctx_one[type]]; + BiContextType *abs_contexts = tex_ctx->abs_contexts[type2ctx_abs[type]]; + + for (i=maxpos[type]; i>=0; i--) + { + if (coeff[i]!=0) + { + coeff[i] += biari_decode_symbol (dep_dp, one_contexts + c1); + if (coeff[i]==2) + { + coeff[i] += unary_exp_golomb_level_decode (dep_dp, abs_contexts + c2); + c2 = c2_clip[c2]; + c1=0; + } + else if (c1) + { + c1 = plus_one_clip4[c1]; + } + if (biari_decode_symbol_eq_prob(dep_dp)) + { + coeff[i] *= -1; + } + } + } +} +#else +void read_significant_coefficients (TextureInfoContexts *tex_ctx, + DecodingEnvironmentPtr dep_dp, + int type, + int coeff[]); +#endif + +/*! +************************************************************************ +* \brief +* Read Block-Transform Coefficients +************************************************************************ +*/ +#if defined(_DEBUG) || defined(_M_X64) +RunLevel readRunLevel_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int context) +{ + RunLevel rl; + Slice *currSlice = currMB->p_Slice; + //--- read coefficients for whole block --- + if (currSlice->coeff_ctr < 0) + { + //===== decode CBP-BIT ===== + if ((currSlice->coeff_ctr = currMB->read_and_store_CBP_block_bit (currMB, dep_dp, context) )!=0) + { + //===== decode significance map ===== + currSlice->coeff_ctr = read_significance_map (currSlice->tex_ctx, currMB, dep_dp, context, currSlice->coeff); + + //===== decode significant coefficients ===== + read_significant_coefficients (currSlice->tex_ctx, dep_dp, context, currSlice->coeff); + } + } + + //--- set run and level --- + + rl.run=0; + if (currSlice->coeff_ctr--) + { + //--- set run and level (coefficient) --- + for (; currSlice->coeff[currSlice->pos] == 0; ++currSlice->pos, ++rl.run); + rl.level = currSlice->coeff[currSlice->pos++]; + //--- decrement coefficient counter and re-set position --- + if (currSlice->coeff_ctr == 0) + currSlice->pos = 0; + return rl; + } + else + { + //--- set run and level (EOB) --- + currSlice->pos = 0; + rl.level = 0; + return rl; + } +} +#endif +/*! +************************************************************************ +* \brief +* arideco_bits_read +************************************************************************ +*/ +static int arideco_bits_read(const DecodingEnvironmentPtr dep) +{ + int tmp = ((*dep->Dcodestrm_len) << 3) - dep->DbitsLeft; + +#if (2==TRACE) + fprintf(p_trace, "tmp: %d\n", tmp); +#endif + return tmp; +} + +/*! +************************************************************************ +* \brief +* decoding of unary binarization using one or 2 distinct +* models for the first and all remaining bins; no terminating +* "0" for max_symbol +*********************************************************************** +*/ +static unsigned int unary_bin_max_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset, + unsigned int max_symbol) +{ + unsigned int symbol = biari_decode_symbol(dep_dp, ctx ); + + if (symbol==0 || (max_symbol == 0)) + return symbol; + else + { + unsigned int l; + ctx += ctx_offset; + symbol = 0; + do + { + l = biari_decode_symbol(dep_dp, ctx); + ++symbol; + } + while( (l != 0) && (symbol < max_symbol) ); + + if ((l != 0) && (symbol == max_symbol)) + ++symbol; + return symbol; + } +} + + +/*! +************************************************************************ +* \brief +* decoding of unary binarization using one or 2 distinct +* models for the first and all remaining bins +*********************************************************************** +*/ +static unsigned int unary_bin_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset) +{ + unsigned int symbol = biari_decode_symbol(dep_dp, ctx ); + + if (symbol == 0) + return 0; + else + { + unsigned int l; + ctx += ctx_offset;; + symbol = 0; + do + { + l=biari_decode_symbol(dep_dp, ctx); + ++symbol; + } + while( l != 0 ); + return symbol; + } +} + + +/*! +************************************************************************ +* \brief +* finding end of a slice in case this is not the end of a frame +* +* Unsure whether the "correction" below actually solves an off-by-one +* problem or whether it introduces one in some cases :-( Anyway, +* with this change the bit stream format works with CABAC again. +* StW, 8.7.02 +************************************************************************ +*/ +int cabac_startcode_follows(Slice *currSlice, int eos_bit) +{ + unsigned int bit; + + if( eos_bit ) + { + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + DataPartition *dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + DecodingEnvironmentPtr dep_dp = &(dP->de_cabac); + + bit = biari_decode_final (dep_dp); //GB + +#if TRACE + fprintf(p_trace, "@%-6d %-63s (%3d)\n",symbolCount++, "end_of_slice_flag", bit); + fflush(p_trace); +#endif + } + else + { + bit = 0; + } + + return bit; +} + +/*! +************************************************************************ +* \brief +* Exp Golomb binarization and decoding of a symbol +* with prob. of 0.5r +************************************************************************ +*/ +unsigned int exp_golomb_decode_eq_prob( DecodingEnvironmentPtr dep_dp, int k) +{ + unsigned int l; + int symbol = 0; + int binary_symbol = 0; + + do + { + l = biari_decode_symbol_eq_prob(dep_dp); + if (l) // always returns 1 or zero + { + symbol += (l<<k); // l is guaranteed to be one + ++k; + } + } + while (l!=0); + + while (k--) //next binary part + if (biari_decode_symbol_eq_prob(dep_dp)==1) + binary_symbol |= (1<<k); + + return (unsigned int) (symbol + binary_symbol); +} + +/*! +************************************************************************ +* \brief +* Exp-Golomb decoding for Motion Vectors +*********************************************************************** +*/ +#if defined(_DEBUG) || defined(_M_X64) +unsigned int unary_exp_golomb_mv_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + unsigned int max_bin) +{ + unsigned int symbol = biari_decode_symbol(dep_dp, ctx ); + + if (symbol == 0) + return 0; + else + { + const unsigned int exp_start = 8; + + ++ctx; + for (symbol=1;symbol<exp_start;) + { + if (!biari_decode_symbol(dep_dp, ctx)) + return symbol; + if ((++symbol)==2) ctx++; + if (symbol==max_bin) + ++ctx; + } + + return exp_start + exp_golomb_decode_eq_prob(dep_dp,3); + } +} +unsigned int unary_exp_golomb_mv_decode3(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx) +{ + unsigned int max_bin = 3; + unsigned int symbol = biari_decode_symbol(dep_dp, ctx ); + + if (symbol == 0) + return 0; + else + { + const unsigned int exp_start = 8; + + ++ctx; + for (symbol=1;symbol<exp_start;) + { + if (!biari_decode_symbol(dep_dp, ctx)) + return symbol; + if ((++symbol)==2) ctx++; + if (symbol==max_bin) + ++ctx; + } + + return exp_start + exp_golomb_decode_eq_prob(dep_dp,3); + } +} +#endif + +/*! +************************************************************************ +* \brief +* Read I_PCM macroblock +************************************************************************ +*/ +void readIPCM_CABAC(Slice *currSlice, struct datapartition *dP) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + Bitstream* currStream = dP->bitstream; + DecodingEnvironmentPtr dep = &(dP->de_cabac); + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBits = (dP->bitstream->bitstream_length << 3) + 7; + + int val = 0; + + int bits_read = 0; + int bitoffset, bitdepth; + int uv, i, j; + + while (dep->DbitsLeft >= 8) + { + dep->Dvalue >>= 8; + dep->DbitsLeft -= 8; + (*dep->Dcodestrm_len)--; + } + + bitoffset = (*dep->Dcodestrm_len) << 3; + + // read luma values + bitdepth = p_Vid->bitdepth_luma; + for(i=0;i<MB_BLOCK_SIZE;++i) + { + for(j=0;j<MB_BLOCK_SIZE;++j) + { + bits_read += GetBits(buf, bitoffset, &val, BitstreamLengthInBits, bitdepth); + currSlice->ipcm[0][i][j] = val; + bitoffset += bitdepth; + } + } + + // read chroma values + bitdepth = p_Vid->bitdepth_chroma; + if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid)) + { + for (uv=1; uv<3; ++uv) + { + for(i=0;i<p_Vid->mb_cr_size_y;++i) + { + for(j=0;j<p_Vid->mb_cr_size_x;++j) + { + bits_read += GetBits(buf, bitoffset, &val, BitstreamLengthInBits, bitdepth); + currSlice->ipcm[uv][i][j] = val; + bitoffset += bitdepth; + } + } + } + } + + (*dep->Dcodestrm_len) += ( bits_read >> 3); + if (bits_read & 7) + { + ++(*dep->Dcodestrm_len); + } +} + diff --git a/Src/h264dec/ldecod/src/context_ini.c b/Src/h264dec/ldecod/src/context_ini.c new file mode 100644 index 00000000..2ffcfeef --- /dev/null +++ b/Src/h264dec/ldecod/src/context_ini.c @@ -0,0 +1,123 @@ + +/*! + ************************************************************************************* + * \file context_ini.c + * + * \brief + * CABAC context initializations + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe <marpe@hhi.de> + * - Heiko Schwarz <hschwarz@hhi.de> + ************************************************************************************** + */ + +#define CONTEXT_INI_C + +#include "defines.h" +#include "global.h" +#include "biaridecod.h" +#include "ctx_tables.h" + + +#define IBIARI_CTX_INIT2(ii,jj,ctx,tab,num, qp) \ +{ \ + for (i=0; i<ii; ++i) \ + for (j=0; j<jj; ++j) \ + { \ + biari_init_context (qp, &(ctx[i][j]), tab ## _I[num][i][j]); \ + } \ +} + +#define PBIARI_CTX_INIT2(ii,jj,ctx,tab,num, qp) \ +{ \ + for (i=0; i<ii; ++i) \ + for (j=0; j<jj; ++j) \ + { \ + biari_init_context (qp, &(ctx[i][j]), tab ## _P[num][i][j]); \ + } \ +} + + +#define IBIARI_CTX_INIT1(jj,ctx,tab,num, qp) \ +{ \ + for (j=0; j<jj; ++j) \ + { \ + biari_init_context (qp, &(ctx[j]), tab ## _I[num][0][j]); \ + } \ +} + + +#define PBIARI_CTX_INIT1(jj,ctx,tab,num, qp) \ +{ \ + { \ + for (j=0; j<jj; ++j) \ + { \ + biari_init_context (qp, &(ctx[j]), tab ## _P[num][0][j]); \ + } \ + } \ +} + +void init_contexts (Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + MotionInfoContexts* mc = currSlice->mot_ctx; + TextureInfoContexts* tc = currSlice->tex_ctx; + int i, j; + int qp = imax(0, p_Vid->qp); + int model_number = currSlice->model_number; + + //printf("%d -", p_Vid->currentSlice->model_number); + + //--- motion coding contexts --- + if ((currSlice->slice_type == I_SLICE)||(currSlice->slice_type == SI_SLICE)) + { + IBIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX, mc->mb_type_contexts, INIT_MB_TYPE, model_number, qp); + IBIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, model_number, qp); + IBIARI_CTX_INIT2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, model_number, qp); + IBIARI_CTX_INIT2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, model_number, qp); + IBIARI_CTX_INIT1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, model_number, qp); + IBIARI_CTX_INIT1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, model_number, qp); + + //--- texture coding contexts --- + IBIARI_CTX_INIT1 ( NUM_TRANSFORM_SIZE_CTX, tc->transform_size_contexts, INIT_TRANSFORM_SIZE, model_number, qp); + IBIARI_CTX_INIT1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, model_number, qp); + IBIARI_CTX_INIT1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, model_number, qp); + IBIARI_CTX_INIT2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, model_number, qp); + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, model_number, qp); + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[0], INIT_MAP, model_number, qp); +#if ENABLE_FIELD_CTX + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[1], INIT_FLD_MAP, model_number, qp); + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[1], INIT_FLD_LAST, model_number, qp); +#endif + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[0], INIT_LAST, model_number, qp); + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, model_number, qp); + IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, model_number, qp); + } + else + { + PBIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX, mc->mb_type_contexts, INIT_MB_TYPE, model_number, qp); + PBIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, model_number, qp); + PBIARI_CTX_INIT2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, model_number, qp); + PBIARI_CTX_INIT2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, model_number, qp); + PBIARI_CTX_INIT1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, model_number, qp); + PBIARI_CTX_INIT1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, model_number, qp); + + //--- texture coding contexts --- + PBIARI_CTX_INIT1 ( NUM_TRANSFORM_SIZE_CTX, tc->transform_size_contexts, INIT_TRANSFORM_SIZE, model_number, qp); + PBIARI_CTX_INIT1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, model_number, qp); + PBIARI_CTX_INIT1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, model_number, qp); + PBIARI_CTX_INIT2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, model_number, qp); + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, model_number, qp); + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[0], INIT_MAP, model_number, qp); +#if ENABLE_FIELD_CTX + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[1], INIT_FLD_MAP, model_number, qp); + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[1], INIT_FLD_LAST, model_number, qp); +#endif + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[0], INIT_LAST, model_number, qp); + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, model_number, qp); + PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, model_number, qp); + } +} + diff --git a/Src/h264dec/ldecod/src/erc_api.c b/Src/h264dec/ldecod/src/erc_api.c new file mode 100644 index 00000000..48e827a0 --- /dev/null +++ b/Src/h264dec/ldecod/src/erc_api.c @@ -0,0 +1,371 @@ + +/*! + ************************************************************************************* + * \file erc_api.c + * + * \brief + * External (still inside video decoder) interface for error concealment module + * + * \author + * - Ari Hourunranta <ari.hourunranta@nokia.com> + * - Viktor Varsa <viktor.varsa@nokia.com> + * - Ye-Kui Wang <wyk@ieee.org> + * + ************************************************************************************* + */ + + +#include "global.h" +#include "memalloc.h" +#include "erc_api.h" + +/*! + ************************************************************************ + * \brief + * Initinize the error concealment module + ************************************************************************ + */ +void ercInit(VideoParameters *p_Vid, int pic_sizex, int pic_sizey, int flag) +{ + ercClose(p_Vid, p_Vid->erc_errorVar); + p_Vid->erc_object_list = (objectBuffer_t *) calloc((pic_sizex * pic_sizey) >> 6, sizeof(objectBuffer_t)); + if (p_Vid->erc_object_list == NULL) no_mem_exit("ercInit: erc_object_list"); + + // the error concealment instance is allocated + p_Vid->erc_errorVar = ercOpen(); + + // set error concealment ON + ercSetErrorConcealment(p_Vid->erc_errorVar, flag); +} + +/*! + ************************************************************************ + * \brief + * Allocates data structures used in error concealment. + *\return + * The allocated ercVariables_t is returned. + ************************************************************************ + */ +ercVariables_t *ercOpen( void ) +{ + ercVariables_t *errorVar = NULL; + + errorVar = (ercVariables_t *)malloc( sizeof(ercVariables_t)); + if ( errorVar == NULL ) no_mem_exit("ercOpen: errorVar"); + + errorVar->nOfMBs = 0; + errorVar->segments = NULL; + errorVar->currSegment = 0; + errorVar->yCondition = NULL; + errorVar->uCondition = NULL; + errorVar->vCondition = NULL; + errorVar->prevFrameYCondition = NULL; + + errorVar->concealment = 1; + + return errorVar; +} + +/*! + ************************************************************************ + * \brief + * Resets the variables used in error detection. + * Should be called always when starting to decode a new frame. + * \param errorVar + * Variables for error concealment + * \param nOfMBs + * Number of macroblocks in a frame + * \param numOfSegments + * Estimated number of segments (memory reserved) + * \param picSizeX + * Width of the frame in pixels. + ************************************************************************ + */ +void ercReset( ercVariables_t *errorVar, int nOfMBs, int numOfSegments, int picSizeX ) +{ + if ( errorVar && errorVar->concealment ) + { + int i = 0; + + // If frame size has been changed + if ( nOfMBs != errorVar->nOfMBs && errorVar->yCondition != NULL ) + { + free( errorVar->yCondition ); + errorVar->yCondition = NULL; + free( errorVar->prevFrameYCondition ); + errorVar->prevFrameYCondition = NULL; + free( errorVar->uCondition ); + errorVar->uCondition = NULL; + free( errorVar->vCondition ); + errorVar->vCondition = NULL; + free( errorVar->segments ); + errorVar->segments = NULL; + } + + // If the structures are uninitialized (first frame, or frame size is changed) + if ( errorVar->yCondition == NULL ) + { + errorVar->segments = (ercSegment_t *)malloc( numOfSegments*sizeof(ercSegment_t) ); + if ( errorVar->segments == NULL ) no_mem_exit("ercReset: errorVar->segments"); + memset( errorVar->segments, 0, numOfSegments*sizeof(ercSegment_t)); + errorVar->nOfSegments = numOfSegments; + + errorVar->yCondition = (int *)malloc( 4*nOfMBs*sizeof(int) ); + if ( errorVar->yCondition == NULL ) no_mem_exit("ercReset: errorVar->yCondition"); + errorVar->prevFrameYCondition = (int *)malloc( 4*nOfMBs*sizeof(int) ); + if ( errorVar->prevFrameYCondition == NULL ) no_mem_exit("ercReset: errorVar->prevFrameYCondition"); + errorVar->uCondition = (int *)malloc( nOfMBs*sizeof(int) ); + if ( errorVar->uCondition == NULL ) no_mem_exit("ercReset: errorVar->uCondition"); + errorVar->vCondition = (int *)malloc( nOfMBs*sizeof(int) ); + if ( errorVar->vCondition == NULL ) no_mem_exit("ercReset: errorVar->vCondition"); + errorVar->nOfMBs = nOfMBs; + } + else + { + // Store the yCondition struct of the previous frame + int *tmp = errorVar->prevFrameYCondition; + errorVar->prevFrameYCondition = errorVar->yCondition; + errorVar->yCondition = tmp; + } + + // Reset tables and parameters + memset( errorVar->yCondition, 0, 4*nOfMBs*sizeof(*errorVar->yCondition)); + memset( errorVar->uCondition, 0, nOfMBs*sizeof(*errorVar->uCondition)); + memset( errorVar->vCondition, 0, nOfMBs*sizeof(*errorVar->vCondition)); + + if (errorVar->nOfSegments != numOfSegments) + { + free( errorVar->segments ); + errorVar->segments = NULL; + errorVar->segments = (ercSegment_t *)malloc( numOfSegments*sizeof(ercSegment_t) ); + if ( errorVar->segments == NULL ) no_mem_exit("ercReset: errorVar->segments"); + errorVar->nOfSegments = numOfSegments; + } + + memset( errorVar->segments, 0, errorVar->nOfSegments*sizeof(ercSegment_t)); + + for ( ; i < errorVar->nOfSegments; i++ ) + { + errorVar->segments[i].fCorrupted = 1; //! mark segments as corrupted + errorVar->segments[i].startMBPos = 0; + errorVar->segments[i].endMBPos = nOfMBs - 1; + } + + errorVar->currSegment = 0; + errorVar->nOfCorruptedSegments = 0; + } +} + +/*! + ************************************************************************ + * \brief + * Resets the variables used in error detection. + * Should be called always when starting to decode a new frame. + * \param p_Vid + * VideoParameters variable + * \param errorVar + * Variables for error concealment + ************************************************************************ + */ +void ercClose(VideoParameters *p_Vid, ercVariables_t *errorVar ) +{ + if ( errorVar != NULL ) + { + if (errorVar->yCondition != NULL) + { + free( errorVar->segments ); + free( errorVar->yCondition ); + free( errorVar->uCondition ); + free( errorVar->vCondition ); + free( errorVar->prevFrameYCondition ); + } + free( errorVar ); + errorVar = NULL; + } + + if ( p_Vid && p_Vid->erc_object_list) + { + free(p_Vid->erc_object_list); + p_Vid->erc_object_list=NULL; + } +} + +/*! + ************************************************************************ + * \brief + * Sets error concealment ON/OFF. Can be invoked only between frames, not during a frame + * \param errorVar + * Variables for error concealment + * \param value + * New value + ************************************************************************ + */ +void ercSetErrorConcealment( ercVariables_t *errorVar, int value ) +{ + if ( errorVar != NULL ) + errorVar->concealment = value; +} + +/*! + ************************************************************************ + * \brief + * Creates a new segment in the segment-list, and marks the start MB and bit position. + * If the end of the previous segment was not explicitly marked by "ercStopSegment", + * also marks the end of the previous segment. + * If needed, it reallocates the segment-list for a larger storage place. + * \param currMBNum + * The MB number where the new slice/segment starts + * \param segment + * Segment/Slice No. counted by the caller + * \param bitPos + * Bitstream pointer: number of bits read from the buffer. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ +void ercStartSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar ) +{ + if ( errorVar && errorVar->concealment ) + { + errorVar->currSegmentCorrupted = 0; + + errorVar->segments[ segment ].fCorrupted = 0; + errorVar->segments[ segment ].startMBPos = currMBNum; + } +} + +/*! + ************************************************************************ + * \brief + * Marks the end position of a segment. + * \param currMBNum + * The last MB number of the previous segment + * \param segment + * Segment/Slice No. counted by the caller + * If (segment<0) the internal segment counter is used. + * \param bitPos + * Bitstream pointer: number of bits read from the buffer. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ +void ercStopSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar ) +{ + if ( errorVar && errorVar->concealment ) + { + errorVar->segments[ segment ].endMBPos = currMBNum; //! Changed TO 12.11.2001 + errorVar->currSegment++; + } +} + +/*! + ************************************************************************ + * \brief + * Marks the current segment (the one which has the "currMBNum" MB in it) + * as lost: all the blocks of the MBs in the segment as corrupted. + * \param picSizeX + * Width of the frame in pixels. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ +void ercMarkCurrSegmentLost(int picSizeX, ercVariables_t *errorVar ) +{ + if ( errorVar && errorVar->concealment ) + { + int current_segment = errorVar->currSegment-1, j; + + if (errorVar->currSegmentCorrupted == 0) + { + errorVar->nOfCorruptedSegments++; + errorVar->currSegmentCorrupted = 1; + } + + for ( j = errorVar->segments[current_segment].startMBPos; j <= errorVar->segments[current_segment].endMBPos; j++ ) + { + errorVar->yCondition[MBNum2YBlock (j, 0, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->yCondition[MBNum2YBlock (j, 1, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->yCondition[MBNum2YBlock (j, 2, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->yCondition[MBNum2YBlock (j, 3, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->uCondition[j] = ERC_BLOCK_CORRUPTED; + errorVar->vCondition[j] = ERC_BLOCK_CORRUPTED; + } + errorVar->segments[current_segment].fCorrupted = 1; + } +} + +/*! + ************************************************************************ + * \brief + * Marks the current segment (the one which has the "currMBNum" MB in it) + * as OK: all the blocks of the MBs in the segment as OK. + * \param picSizeX + * Width of the frame in pixels. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ +void ercMarkCurrSegmentOK(int picSizeX, ercVariables_t *errorVar ) +{ + if ( errorVar && errorVar->concealment ) + { + int current_segment = errorVar->currSegment-1, j; + + // mark all the Blocks belonging to the segment as OK */ + for ( j = errorVar->segments[current_segment].startMBPos; j <= errorVar->segments[current_segment].endMBPos; j++ ) + { + errorVar->yCondition[MBNum2YBlock (j, 0, picSizeX)] = ERC_BLOCK_OK; + errorVar->yCondition[MBNum2YBlock (j, 1, picSizeX)] = ERC_BLOCK_OK; + errorVar->yCondition[MBNum2YBlock (j, 2, picSizeX)] = ERC_BLOCK_OK; + errorVar->yCondition[MBNum2YBlock (j, 3, picSizeX)] = ERC_BLOCK_OK; + errorVar->uCondition[j] = ERC_BLOCK_OK; + errorVar->vCondition[j] = ERC_BLOCK_OK; + } + errorVar->segments[current_segment].fCorrupted = 0; + } +} + +/*! + ************************************************************************ + * \brief + * Marks the Blocks of the given component (YUV) of the current MB as concealed. + * \param currMBNum + * Selects the segment where this MB number is in. + * \param comp + * Component to mark (0:Y, 1:U, 2:V, <0:All) + * \param picSizeX + * Width of the frame in pixels. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ +void ercMarkCurrMBConcealed( int currMBNum, int comp, int picSizeX, ercVariables_t *errorVar ) +{ + int setAll = 0; + + if ( errorVar && errorVar->concealment ) + { + if (comp < 0) + { + setAll = 1; + comp = 0; + } + + switch (comp) + { + case 0: + errorVar->yCondition[MBNum2YBlock (currMBNum, 0, picSizeX)] = ERC_BLOCK_CONCEALED; + errorVar->yCondition[MBNum2YBlock (currMBNum, 1, picSizeX)] = ERC_BLOCK_CONCEALED; + errorVar->yCondition[MBNum2YBlock (currMBNum, 2, picSizeX)] = ERC_BLOCK_CONCEALED; + errorVar->yCondition[MBNum2YBlock (currMBNum, 3, picSizeX)] = ERC_BLOCK_CONCEALED; + if (!setAll) + break; + case 1: + errorVar->uCondition[currMBNum] = ERC_BLOCK_CONCEALED; + if (!setAll) + break; + case 2: + errorVar->vCondition[currMBNum] = ERC_BLOCK_CONCEALED; + } + } +} diff --git a/Src/h264dec/ldecod/src/erc_do_i.c b/Src/h264dec/ldecod/src/erc_do_i.c new file mode 100644 index 00000000..55d2a38f --- /dev/null +++ b/Src/h264dec/ldecod/src/erc_do_i.c @@ -0,0 +1,544 @@ + +/*! + ************************************************************************************* + * \file + * erc_do_i.c + * + * \brief + * Intra (I) frame error concealment algorithms for decoder + * + * \author + * - Ari Hourunranta <ari.hourunranta@nokia.com> + * - Viktor Varsa <viktor.varsa@nokia.com> + * - Ye-Kui Wang <wyk@ieee.org> + * + ************************************************************************************* + */ + +#include "global.h" +#include "erc_do.h" + +static void concealBlocks ( VideoParameters *p_Vid, int lastColumn, int lastRow, int comp, frame *recfr, int picSizeX, int *condition ); +static void pixMeanInterpolateBlock( VideoParameters *p_Vid, imgpel *src[], imgpel *block, int blockSize, int frameWidth ); + +/*! + ************************************************************************ + * \brief + * The main function for Intra frame concealment. + * Calls "concealBlocks" for each color component (Y,U,V) separately + * \return + * 0, if the concealment was not successful and simple concealment should be used + * 1, otherwise (even if none of the blocks were concealed) + * \param p_Vid + * image encoding parameters for current picture + * \param recfr + * Reconstructed frame buffer + * \param picSizeX + * Width of the frame in pixels + * \param picSizeY + * Height of the frame in pixels + * \param errorVar + * Variables for error concealment + ************************************************************************ + */ +int ercConcealIntraFrame( VideoParameters *p_Vid, frame *recfr, int picSizeX, int picSizeY, ercVariables_t *errorVar ) +{ + int lastColumn = 0, lastRow = 0; + + // if concealment is on + if ( errorVar && errorVar->concealment ) + { + // if there are segments to be concealed + if ( errorVar->nOfCorruptedSegments ) + { + // Y + lastRow = (int) (picSizeY>>3); + lastColumn = (int) (picSizeX>>3); + concealBlocks( p_Vid, lastColumn, lastRow, 0, recfr, picSizeX, errorVar->yCondition ); + + // U (dimensions halved compared to Y) + lastRow = (int) (picSizeY>>4); + lastColumn = (int) (picSizeX>>4); + concealBlocks( p_Vid, lastColumn, lastRow, 1, recfr, picSizeX, errorVar->uCondition ); + + // V ( dimensions equal to U ) + concealBlocks( p_Vid, lastColumn, lastRow, 2, recfr, picSizeX, errorVar->vCondition ); + } + return 1; + } + else + return 0; +} + +/*! + ************************************************************************ + * \brief + * Conceals the MB at position (row, column) using pixels from predBlocks[] + * using pixMeanInterpolateBlock() + * \param p_Vid + * image encoding parameters for current picture + * \param currFrame + * current frame + * \param row + * y coordinate in blocks + * \param column + * x coordinate in blocks + * \param predBlocks[] + * list of neighboring source blocks (numbering 0 to 7, 1 means: use the neighbor) + * \param frameWidth + * width of frame in pixels + * \param mbWidthInBlocks + * 2 for Y, 1 for U/V components + ************************************************************************ + */ +void ercPixConcealIMB(VideoParameters *p_Vid, imgpel *currFrame, int row, int column, int predBlocks[], int frameWidth, int mbWidthInBlocks) +{ + imgpel *src[8]={NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}; + imgpel *currBlock = NULL; + + // collect the reliable neighboring blocks + if (predBlocks[0]) + src[0] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + (column+mbWidthInBlocks)*8; + if (predBlocks[1]) + src[1] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + (column-mbWidthInBlocks)*8; + if (predBlocks[2]) + src[2] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + (column-mbWidthInBlocks)*8; + if (predBlocks[3]) + src[3] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + (column+mbWidthInBlocks)*8; + if (predBlocks[4]) + src[4] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + column*8; + if (predBlocks[5]) + src[5] = currFrame + row*frameWidth*8 + (column-mbWidthInBlocks)*8; + if (predBlocks[6]) + src[6] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + column*8; + if (predBlocks[7]) + src[7] = currFrame + row*frameWidth*8 + (column+mbWidthInBlocks)*8; + + currBlock = currFrame + row*frameWidth*8 + column*8; + pixMeanInterpolateBlock( p_Vid, src, currBlock, mbWidthInBlocks*8, frameWidth ); +} + +/*! + ************************************************************************ + * \brief + * This function checks the neighbors of a Macroblock for usability in + * concealment. First the OK macroblocks are marked, and if there is not + * enough of them, then the CONCEALED ones as well. + * A "1" in the the output array means reliable, a "0" non reliable MB. + * The block order in "predBlocks": + * 1 4 0 + * 5 x 7 + * 2 6 3 + * i.e., corners first. + * \return + * Number of useable neighbor macroblocks for concealment. + * \param predBlocks[] + * Array for indicating the valid neighbor blocks + * \param currRow + * Current block row in the frame + * \param currColumn + * Current block column in the frame + * \param condition + * The block condition (ok, lost) table + * \param maxRow + * Number of block rows in the frame + * \param maxColumn + * Number of block columns in the frame + * \param step + * Number of blocks belonging to a MB, when counting + * in vertical/horizontal direction. (Y:2 U,V:1) + * \param fNoCornerNeigh + * No corner neighbors are considered + ************************************************************************ + */ +int ercCollect8PredBlocks( int predBlocks[], int currRow, int currColumn, int *condition, + int maxRow, int maxColumn, int step, byte fNoCornerNeigh ) +{ + int srcCounter = 0; + int srcCountMin = (fNoCornerNeigh ? 2 : 4); + int threshold = ERC_BLOCK_OK; + + memset( predBlocks, 0, 8*sizeof(int) ); + + // collect the reliable neighboring blocks + do + { + srcCounter = 0; + // top + if (currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn ] >= threshold ) + { //ERC_BLOCK_OK (3) or ERC_BLOCK_CONCEALED (2) + predBlocks[4] = condition[ (currRow-1)*maxColumn + currColumn ]; + srcCounter++; + } + // bottom + if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn ] >= threshold ) + { + predBlocks[6] = condition[ (currRow+step)*maxColumn + currColumn ]; + srcCounter++; + } + + if ( currColumn > 0 ) + { + // left + if ( condition[ currRow*maxColumn + currColumn - 1 ] >= threshold ) + { + predBlocks[5] = condition[ currRow*maxColumn + currColumn - 1 ]; + srcCounter++; + } + + if ( !fNoCornerNeigh ) + { + // top-left + if ( currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn - 1 ] >= threshold ) + { + predBlocks[1] = condition[ (currRow-1)*maxColumn + currColumn - 1 ]; + srcCounter++; + } + // bottom-left + if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn - 1 ] >= threshold ) + { + predBlocks[2] = condition[ (currRow+step)*maxColumn + currColumn - 1 ]; + srcCounter++; + } + } + } + + if ( currColumn < (maxColumn-step) ) + { + // right + if ( condition[ currRow*maxColumn+currColumn + step ] >= threshold ) + { + predBlocks[7] = condition[ currRow*maxColumn+currColumn + step ]; + srcCounter++; + } + + if ( !fNoCornerNeigh ) + { + // top-right + if ( currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn + step ] >= threshold ) + { + predBlocks[0] = condition[ (currRow-1)*maxColumn + currColumn + step ]; + srcCounter++; + } + // bottom-right + if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn + step ] >= threshold ) + { + predBlocks[3] = condition[ (currRow+step)*maxColumn + currColumn + step ]; + srcCounter++; + } + } + } + // prepare for the next round + threshold--; + if (threshold < ERC_BLOCK_CONCEALED) + break; + } while ( srcCounter < srcCountMin); + + return srcCounter; +} + +/*! + ************************************************************************ + * \brief + * collects prediction blocks only from the current column + * \return + * Number of usable neighbour Macroblocks for concealment. + * \param predBlocks[] + * Array for indicating the valid neighbor blocks + * \param currRow + * Current block row in the frame + * \param currColumn + * Current block column in the frame + * \param condition + * The block condition (ok, lost) table + * \param maxRow + * Number of block rows in the frame + * \param maxColumn + * Number of block columns in the frame + * \param step + * Number of blocks belonging to a MB, when counting + * in vertical/horizontal direction. (Y:2 U,V:1) + ************************************************************************ + */ +int ercCollectColumnBlocks( int predBlocks[], int currRow, int currColumn, int *condition, int maxRow, int maxColumn, int step ) +{ + int srcCounter = 0, threshold = ERC_BLOCK_CORRUPTED; + + memset( predBlocks, 0, 8*sizeof(int) ); + + // in this case, row > 0 and row < 17 + if ( condition[ (currRow-1)*maxColumn + currColumn ] > threshold ) + { + predBlocks[4] = 1; + srcCounter++; + } + if ( condition[ (currRow+step)*maxColumn + currColumn ] > threshold ) + { + predBlocks[6] = 1; + srcCounter++; + } + + return srcCounter; +} + +/*! + ************************************************************************ + * \brief + * Core for the Intra blocks concealment. + * It is called for each color component (Y,U,V) separately + * Finds the corrupted blocks and calls pixel interpolation functions + * to correct them, one block at a time. + * Scanning is done vertically and each corrupted column is corrected + * bi-directionally, i.e., first block, last block, first block+1, last block -1 ... + * \param p_Vid + * image encoding parameters for current picture + * \param lastColumn + * Number of block columns in the frame + * \param lastRow + * Number of block rows in the frame + * \param comp + * color component + * \param recfr + * Reconstructed frame buffer + * \param picSizeX + * Width of the frame in pixels + * \param condition + * The block condition (ok, lost) table + ************************************************************************ + */ +static void concealBlocks( VideoParameters *p_Vid, int lastColumn, int lastRow, int comp, frame *recfr, int picSizeX, int *condition ) +{ + int row, column, srcCounter = 0, thr = ERC_BLOCK_CORRUPTED, + lastCorruptedRow = -1, firstCorruptedRow = -1, currRow = 0, + areaHeight = 0, i = 0, smoothColumn = 0; + int predBlocks[8], step = 1; + + // in the Y component do the concealment MB-wise (not block-wise): + // this is useful if only whole MBs can be damaged or lost + if ( comp == 0 ) + step = 2; + else + step = 1; + + for ( column = 0; column < lastColumn; column += step ) + { + for ( row = 0; row < lastRow; row += step ) + { + if ( condition[row*lastColumn+column] <= thr ) + { + firstCorruptedRow = row; + // find the last row which has corrupted blocks (in same continuous area) + for ( lastCorruptedRow = row+step; lastCorruptedRow < lastRow; lastCorruptedRow += step ) + { + // check blocks in the current column + if ( condition[ lastCorruptedRow*lastColumn + column ] > thr ) + { + // current one is already OK, so the last was the previous one + lastCorruptedRow -= step; + break; + } + } + if ( lastCorruptedRow >= lastRow ) + { + // correct only from above + lastCorruptedRow = lastRow-step; + for ( currRow = firstCorruptedRow; currRow < lastRow; currRow += step ) + { + srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 ); + + switch( comp ) + { + case 0 : + ercPixConcealIMB( p_Vid, recfr->yptr, currRow, column, predBlocks, picSizeX, 2 ); + break; + case 1 : + ercPixConcealIMB( p_Vid, recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + case 2 : + ercPixConcealIMB( p_Vid, recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + } + + if ( comp == 0 ) + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED; + } + else + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + } + + } + row = lastRow; + } + else if ( firstCorruptedRow == 0 ) + { + // correct only from below + for ( currRow = lastCorruptedRow; currRow >= 0; currRow -= step ) + { + srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 ); + + switch( comp ) + { + case 0 : + ercPixConcealIMB( p_Vid, recfr->yptr, currRow, column, predBlocks, picSizeX, 2 ); + break; + case 1 : + ercPixConcealIMB( p_Vid, recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + case 2 : + ercPixConcealIMB( p_Vid, recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + } + + if ( comp == 0 ) + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED; + } + else + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + } + + } + + row = lastCorruptedRow+step; + } + else + { + // correct bi-directionally + + row = lastCorruptedRow+step; + areaHeight = lastCorruptedRow-firstCorruptedRow+step; + + // Conceal the corrupted area switching between the up and the bottom rows + for ( i = 0; i < areaHeight; i += step ) + { + if ( i % 2 ) + { + currRow = lastCorruptedRow; + lastCorruptedRow -= step; + } + else + { + currRow = firstCorruptedRow; + firstCorruptedRow += step; + } + + if (smoothColumn > 0) + { + srcCounter = ercCollectColumnBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step ); + } + else + { + srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 ); + } + + switch( comp ) + { + case 0 : + ercPixConcealIMB( p_Vid, recfr->yptr, currRow, column, predBlocks, picSizeX, 2 ); + break; + + case 1 : + ercPixConcealIMB( p_Vid, recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + + case 2 : + ercPixConcealIMB( p_Vid, recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + } + + if ( comp == 0 ) + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED; + } + else + { + condition[ currRow*lastColumn+column ] = ERC_BLOCK_CONCEALED; + } + } + } + + lastCorruptedRow = -1; + firstCorruptedRow = -1; + + } + } + } +} + +/*! + ************************************************************************ + * \brief + * Does the actual pixel based interpolation for block[] + * using weighted average + * \param p_Vid + * image encoding parameters for current picture + * \param src[] + * pointers to neighboring source blocks + * \param block + * destination block + * \param blockSize + * 16 for Y, 8 for U/V components + * \param frameWidth + * Width of the frame in pixels + ************************************************************************ + */ +static void pixMeanInterpolateBlock( VideoParameters *p_Vid, imgpel *src[], imgpel *block, int blockSize, int frameWidth ) +{ + int row, column, k, tmp, srcCounter = 0, weight = 0, bmax = blockSize - 1; + + k = 0; + for ( row = 0; row < blockSize; row++ ) + { + for ( column = 0; column < blockSize; column++ ) + { + tmp = 0; + srcCounter = 0; + // above + if ( src[4] != NULL ) + { + weight = blockSize-row; + tmp += weight * (*(src[4]+bmax*frameWidth+column)); + srcCounter += weight; + } + // left + if ( src[5] != NULL ) + { + weight = blockSize-column; + tmp += weight * (*(src[5]+row*frameWidth+bmax)); + srcCounter += weight; + } + // below + if ( src[6] != NULL ) + { + weight = row+1; + tmp += weight * (*(src[6]+column)); + srcCounter += weight; + } + // right + if ( src[7] != NULL ) + { + weight = column+1; + tmp += weight * (*(src[7]+row*frameWidth)); + srcCounter += weight; + } + + if ( srcCounter > 0 ) + block[ k + column ] = (byte)(tmp/srcCounter); + else + block[ k + column ] = blockSize == 8 ? p_Vid->dc_pred_value_comp[1] : p_Vid->dc_pred_value_comp[0]; + } + k += frameWidth; + } +} diff --git a/Src/h264dec/ldecod/src/erc_do_p.c b/Src/h264dec/ldecod/src/erc_do_p.c new file mode 100644 index 00000000..69727d2b --- /dev/null +++ b/Src/h264dec/ldecod/src/erc_do_p.c @@ -0,0 +1,1742 @@ + +/*! + ************************************************************************************* + * \file + * erc_do_p.c + * + * \brief + * Inter (P) frame error concealment algorithms for decoder + * + * \author + * - Viktor Varsa <viktor.varsa@nokia.com> + * - Ye-Kui Wang <wyk@ieee.org> + * - Jill Boyce <jill.boyce@thomson.net> + * - Saurav K Bandyopadhyay <saurav@ieee.org> + * - Zhenyu Wu <Zhenyu.Wu@thomson.net> + * - Purvin Pandit <Purvin.Pandit@thomson.net> + * + ************************************************************************************* + */ + +#include "global.h" +#include "mbuffer.h" +#include "memalloc.h" +#include "erc_do.h" +#include "image.h" +#include "mc_prediction.h" +#include "macroblock.h" + + +// static function declarations +static int concealByCopy(frame *recfr, int currMBNum, objectBuffer_t *object_list, int picSizeX); +static int concealByTrial(frame *recfr, imgpel *predMB, + int currMBNum, objectBuffer_t *object_list, int predBlocks[], + int picSizeX, int picSizeY, int *yCondition); +static int edgeDistortion (int predBlocks[], int currYBlockNum, imgpel *predMB, + imgpel *recY, int picSizeX, int regionSize); +static void copyBetweenFrames (frame *recfr, int currYBlockNum, int picSizeX, int regionSize); +static void buildPredRegionYUV(VideoParameters *p_Vid, const short *mv, int x, int y, imgpel *predMB); + +// picture error concealment +static void buildPredblockRegionYUV(VideoParameters *p_Vid, const short *mv, + int x, int y, imgpel *predMB, int list); +static void CopyImgData(imgpel **inputY, imgpel ***inputUV, imgpel **outputY, imgpel ***outputUV, + int img_width, int img_height, int img_width_cr, int img_height_cr); + +static void copyPredMB (int currYBlockNum, imgpel *predMB, frame *recfr, + int picSizeX, int regionSize); +static void add_node ( VideoParameters *p_Vid, struct concealment_node *ptr ); +static void delete_node( VideoParameters *p_Vid, struct concealment_node *ptr ); + +static const int uv_div[2][4] = {{0, 1, 1, 0}, {0, 1, 0, 0}}; //[x/y][yuv_format] + +/*! + ************************************************************************ + * \brief + * The main function for Inter (P) frame concealment. + * \return + * 0, if the concealment was not successful and simple concealment should be used + * 1, otherwise (even if none of the blocks were concealed) + * \param recfr + * Reconstructed frame buffer + * \param object_list + * Motion info for all MBs in the frame + * \param picSizeX + * Width of the frame in pixels + * \param picSizeY + * Height of the frame in pixels + * \param errorVar + * Variables for error concealment + * \param chroma_format_idc + * Chroma format IDC + ************************************************************************ + */ +int ercConcealInterFrame(frame *recfr, objectBuffer_t *object_list, + int picSizeX, int picSizeY, ercVariables_t *errorVar, int chroma_format_idc ) +{ + VideoParameters *p_Vid = recfr->p_Vid; + int lastColumn = 0, lastRow = 0, predBlocks[8]; + int lastCorruptedRow = -1, firstCorruptedRow = -1; + int currRow = 0, row, column, columnInd, areaHeight = 0, i = 0; + imgpel *predMB; + + /* if concealment is on */ + if ( errorVar && errorVar->concealment ) + { + /* if there are segments to be concealed */ + if ( errorVar->nOfCorruptedSegments ) + { + if (chroma_format_idc != YUV400) + predMB = (imgpel *) malloc ( (256 + (p_Vid->mb_cr_size_x * p_Vid->mb_cr_size_y)*2) * sizeof (imgpel)); + else + predMB = (imgpel *) malloc(256 * sizeof (imgpel)); + + if ( predMB == NULL ) no_mem_exit("ercConcealInterFrame: predMB"); + + lastRow = (int) (picSizeY>>4); + lastColumn = (int) (picSizeX>>4); + + for ( columnInd = 0; columnInd < lastColumn; columnInd ++) + { + column = ((columnInd%2) ? (lastColumn - columnInd/2 -1) : (columnInd/2)); + + for ( row = 0; row < lastRow; row++) + { + + if ( errorVar->yCondition[MBxy2YBlock(column, row, 0, picSizeX)] <= ERC_BLOCK_CORRUPTED ) + { // ERC_BLOCK_CORRUPTED (1) or ERC_BLOCK_EMPTY (0) + firstCorruptedRow = row; + /* find the last row which has corrupted blocks (in same continuous area) */ + for ( lastCorruptedRow = row+1; lastCorruptedRow < lastRow; lastCorruptedRow++) + { + /* check blocks in the current column */ + if (errorVar->yCondition[MBxy2YBlock(column, lastCorruptedRow, 0, picSizeX)] > ERC_BLOCK_CORRUPTED) + { + /* current one is already OK, so the last was the previous one */ + lastCorruptedRow --; + break; + } + } + if ( lastCorruptedRow >= lastRow ) + { + /* correct only from above */ + lastCorruptedRow = lastRow-1; + for ( currRow = firstCorruptedRow; currRow < lastRow; currRow++ ) + { + + ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1), + errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0); + + if(p_Vid->erc_mvperMB >= MVPERMB_THR) + concealByTrial(recfr, predMB, + currRow*lastColumn+column, object_list, predBlocks, + picSizeX, picSizeY, + errorVar->yCondition); + else + concealByCopy(recfr, currRow*lastColumn+column, + object_list, picSizeX); + + ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar); + } + row = lastRow; + } + else if ( firstCorruptedRow == 0 ) + { + /* correct only from below */ + for ( currRow = lastCorruptedRow; currRow >= 0; currRow-- ) + { + + ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1), + errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0); + + if(p_Vid->erc_mvperMB >= MVPERMB_THR) + concealByTrial(recfr, predMB, + currRow*lastColumn+column, object_list, predBlocks, + picSizeX, picSizeY, + errorVar->yCondition); + else + concealByCopy(recfr, currRow*lastColumn+column, + object_list, picSizeX); + + ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar); + } + + row = lastCorruptedRow+1; + } + else + { + /* correct bi-directionally */ + + row = lastCorruptedRow+1; + + areaHeight = lastCorruptedRow-firstCorruptedRow+1; + + /* + * Conceal the corrupted area switching between the up and the bottom rows + */ + for ( i = 0; i < areaHeight; i++) + { + if ( i % 2 ) + { + currRow = lastCorruptedRow; + lastCorruptedRow --; + } + else + { + currRow = firstCorruptedRow; + firstCorruptedRow ++; + } + + ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1), + errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0); + + if(p_Vid->erc_mvperMB >= MVPERMB_THR) + concealByTrial(recfr, predMB, + currRow*lastColumn+column, object_list, predBlocks, + picSizeX, picSizeY, + errorVar->yCondition); + else + concealByCopy(recfr, currRow*lastColumn+column, + object_list, picSizeX); + + ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar); + + } + } + lastCorruptedRow = -1; + firstCorruptedRow = -1; + } + } + } + + free(predMB); + } + return 1; + } + else + return 0; +} + +/*! + ************************************************************************ + * \brief + * It conceals a given MB by simply copying the pixel area from the reference image + * that is at the same location as the macroblock in the current image. This correcponds + * to COPY MBs. + * \return + * Always zero (0). + * \param recfr + * Reconstructed frame buffer + * \param currMBNum + * current MB index + * \param object_list + * Motion info for all MBs in the frame + * \param picSizeX + * Width of the frame in pixels + ************************************************************************ + */ +static int concealByCopy(frame *recfr, int currMBNum, + objectBuffer_t *object_list, int picSizeX) +{ + objectBuffer_t *currRegion; + + currRegion = object_list+(currMBNum<<2); + currRegion->regionMode = REGMODE_INTER_COPY; + + currRegion->xMin = (xPosMB(currMBNum,picSizeX)<<4); + currRegion->yMin = (yPosMB(currMBNum,picSizeX)<<4); + + copyBetweenFrames (recfr, MBNum2YBlock(currMBNum,0,picSizeX), picSizeX, 16); + + return 0; +} + +/*! + ************************************************************************ + * \brief + * Copies the co-located pixel values from the reference to the current frame. + * Used by concealByCopy + * \param recfr + * Reconstructed frame buffer + * \param currYBlockNum + * index of the block (8x8) in the Y plane + * \param picSizeX + * Width of the frame in pixels + * \param regionSize + * can be 16 or 8 to tell the dimension of the region to copy + ************************************************************************ + */ +static void copyBetweenFrames (frame *recfr, int currYBlockNum, int picSizeX, int regionSize) +{ + VideoParameters *p_Vid = recfr->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + int j, k, location, xmin, ymin; + StorablePicture* refPic = p_Vid->listX[0][0]; + + /* set the position of the region to be copied */ + xmin = (xPosYBlock(currYBlockNum,picSizeX)<<3); + ymin = (yPosYBlock(currYBlockNum,picSizeX)<<3); + + for (j = ymin; j < ymin + regionSize; j++) + for (k = xmin; k < xmin + regionSize; k++) + { + location = j * picSizeX + k; +//th recfr->yptr[location] = dec_picture->imgY[j][k]; + recfr->yptr[location] = refPic->imgY->img[j][k]; + } + + for (j = ymin >> uv_div[1][dec_picture->chroma_format_idc]; j < (ymin + regionSize) >> uv_div[1][dec_picture->chroma_format_idc]; j++) + for (k = xmin >> uv_div[0][dec_picture->chroma_format_idc]; k < (xmin + regionSize) >> uv_div[0][dec_picture->chroma_format_idc]; k++) + { +// location = j * picSizeX / 2 + k; + location = ((j * picSizeX) >> uv_div[0][dec_picture->chroma_format_idc]) + k; + +//th recfr->uptr[location] = dec_picture->imgUV[0][j][k]; +//th recfr->vptr[location] = dec_picture->imgUV[1][j][k]; + recfr->uptr[location] = refPic->imgUV[0]->img[j][k]; + recfr->vptr[location] = refPic->imgUV[1]->img[j][k]; + } +} + +/*! + ************************************************************************ + * \brief + * It conceals a given MB by using the motion vectors of one reliable neighbor. That MV of a + * neighbor is selected wich gives the lowest pixel difference at the edges of the MB + * (see function edgeDistortion). This corresponds to a spatial smoothness criteria. + * \return + * Always zero (0). + * \param recfr + * Reconstructed frame buffer + * \param predMB + * memory area for storing temporary pixel values for a macroblock + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + * \param currMBNum + * current MB index + * \param object_list + * array of region structures storing region mode and mv for each region + * \param predBlocks + * status array of the neighboring blocks (if they are OK, concealed or lost) + * \param picSizeX + * Width of the frame in pixels + * \param picSizeY + * Height of the frame in pixels + * \param yCondition + * array for conditions of Y blocks from ercVariables_t + ************************************************************************ + */ +static int concealByTrial(frame *recfr, imgpel *predMB, + int currMBNum, objectBuffer_t *object_list, int predBlocks[], + int picSizeX, int picSizeY, int *yCondition) +{ + VideoParameters *p_Vid = recfr->p_Vid; + int predMBNum = 0, numMBPerLine, + compSplit1 = 0, compSplit2 = 0, compLeft = 1, comp = 0, compPred, order = 1, + fInterNeighborExists, numIntraNeighbours, + fZeroMotionChecked, predSplitted = 0, + threshold = ERC_BLOCK_OK, + minDist, currDist, i, k, bestDir; + int regionSize; + objectBuffer_t *currRegion; + short mvBest[3] = {0, 0, 0}, mvPred[3] = {0, 0, 0}, *mvptr; + + numMBPerLine = (int) (picSizeX>>4); + + p_Vid->current_mb_nr = currMBNum; + + comp = 0; + regionSize = 16; + + do + { /* 4 blocks loop */ + + currRegion = object_list+(currMBNum<<2)+comp; + + /* set the position of the region to be concealed */ + + currRegion->xMin = (xPosYBlock(MBNum2YBlock(currMBNum,comp,picSizeX),picSizeX)<<3); + currRegion->yMin = (yPosYBlock(MBNum2YBlock(currMBNum,comp,picSizeX),picSizeX)<<3); + + do + { /* reliability loop */ + + minDist = 0; + fInterNeighborExists = 0; + numIntraNeighbours = 0; + fZeroMotionChecked = 0; + + /* loop the 4 neighbours */ + for (i = 4; i < 8; i++) + { + + /* if reliable, try it */ + if (predBlocks[i] >= threshold) + { + switch (i) + { + case 4: + predMBNum = currMBNum-numMBPerLine; + compSplit1 = 2; + compSplit2 = 3; + break; + + case 5: + predMBNum = currMBNum-1; + compSplit1 = 1; + compSplit2 = 3; + break; + + case 6: + predMBNum = currMBNum+numMBPerLine; + compSplit1 = 0; + compSplit2 = 1; + break; + + case 7: + predMBNum = currMBNum+1; + compSplit1 = 0; + compSplit2 = 2; + break; + } + + /* try the concealment with the Motion Info of the current neighbour + only try if the neighbour is not Intra */ + if (isBlock(object_list,predMBNum,compSplit1,INTRA) || + isBlock(object_list,predMBNum,compSplit2,INTRA)) + { + numIntraNeighbours++; + } + else + { + /* if neighbour MB is splitted, try both neighbour blocks */ + for (predSplitted = isSplitted(object_list, predMBNum), + compPred = compSplit1; + predSplitted >= 0; + compPred = compSplit2, + predSplitted -= ((compSplit1 == compSplit2) ? 2 : 1)) + { + + /* if Zero Motion Block, do the copying. This option is tried only once */ + if (isBlock(object_list, predMBNum, compPred, INTER_COPY)) + { + + if (fZeroMotionChecked) + { + continue; + } + else + { + fZeroMotionChecked = 1; + + mvPred[0] = mvPred[1] = 0; + mvPred[2] = 0; + + buildPredRegionYUV(p_Vid->erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB); + } + } + /* build motion using the neighbour's Motion Parameters */ + else if (isBlock(object_list,predMBNum,compPred,INTRA)) + { + continue; + } + else + { + mvptr = getParam(object_list, predMBNum, compPred, mv); + + mvPred[0] = mvptr[0]; + mvPred[1] = mvptr[1]; + mvPred[2] = mvptr[2]; + + buildPredRegionYUV(p_Vid->erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB); + } + + /* measure absolute boundary pixel difference */ + currDist = edgeDistortion(predBlocks, + MBNum2YBlock(currMBNum,comp,picSizeX), + predMB, recfr->yptr, picSizeX, regionSize); + + /* if so far best -> store the pixels as the best concealment */ + if (currDist < minDist || !fInterNeighborExists) + { + + minDist = currDist; + bestDir = i; + + for (k=0;k<3;k++) + mvBest[k] = mvPred[k]; + + currRegion->regionMode = + (isBlock(object_list, predMBNum, compPred, INTER_COPY)) ? + ((regionSize == 16) ? REGMODE_INTER_COPY : REGMODE_INTER_COPY_8x8) : + ((regionSize == 16) ? REGMODE_INTER_PRED : REGMODE_INTER_PRED_8x8); + + copyPredMB(MBNum2YBlock(currMBNum,comp,picSizeX), predMB, recfr, + picSizeX, regionSize); + } + + fInterNeighborExists = 1; + } + } + } + } + + threshold--; + + } while ((threshold >= ERC_BLOCK_CONCEALED) && (fInterNeighborExists == 0)); + + /* always try zero motion */ + if (!fZeroMotionChecked) + { + mvPred[0] = mvPred[1] = 0; + mvPred[2] = 0; + + buildPredRegionYUV(p_Vid->erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB); + + currDist = edgeDistortion(predBlocks, + MBNum2YBlock(currMBNum,comp,picSizeX), + predMB, recfr->yptr, picSizeX, regionSize); + + if (currDist < minDist || !fInterNeighborExists) + { + + minDist = currDist; + for (k=0;k<3;k++) + mvBest[k] = mvPred[k]; + + currRegion->regionMode = + ((regionSize == 16) ? REGMODE_INTER_COPY : REGMODE_INTER_COPY_8x8); + + copyPredMB(MBNum2YBlock(currMBNum,comp,picSizeX), predMB, recfr, + picSizeX, regionSize); + } + } + + for (i=0; i<3; i++) + currRegion->mv[i] = mvBest[i]; + + yCondition[MBNum2YBlock(currMBNum,comp,picSizeX)] = ERC_BLOCK_CONCEALED; + comp = (comp+order+4)%4; + compLeft--; + + } while (compLeft); + + return 0; +} + +/*! +************************************************************************ +* \brief +* Builds the motion prediction pixels from the given location (in 1/4 pixel units) +* of the reference frame. It not only copies the pixel values but builds the interpolation +* when the pixel positions to be copied from is not full pixel (any 1/4 pixel position). +* It copies the resulting pixel vlaues into predMB. +* \param p_Vid +* The pointer of img_par struture of current frame +* \param mv +* The pointer of the predicted MV of the current (being concealed) MB +* \param x +* The x-coordinate of the above-left corner pixel of the current MB +* \param y +* The y-coordinate of the above-left corner pixel of the current MB +* \param predMB +* memory area for storing temporary pixel values for a macroblock +* the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 +************************************************************************ +*/ +static void buildPredRegionYUV(VideoParameters *p_Vid, const short *mv, int x, int y, imgpel *predMB) +{ + int i=0, j=0, ii=0, jj=0,i1=0,j1=0,j4=0,i4=0; + int jf=0; + int uv; + int vec1_x=0,vec1_y=0; + int ioff,joff; + imgpel *pMB = predMB; + + StorablePicture *dec_picture = p_Vid->dec_picture; + int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0; + int mv_mul; + + //FRExt + int f1_x, f1_y, f2_x, f2_y, f3, f4, ifx; + int b8, b4; + int yuv = dec_picture->chroma_format_idc - 1; + + int ref_frame = imax (mv[2], 0); // !!KS: quick fix, we sometimes seem to get negative ref_pic here, so restrict to zero and above + int mb_nr = p_Vid->current_mb_nr; + + Macroblock *currMB = &p_Vid->mb_data[mb_nr]; // intialization code deleted, see below, StW + Slice *currSlice = currMB->p_Slice; + + h264_imgpel_macroblock_t tmp_block; + + /* Update coordinates of the current concealed macroblock */ + p_Vid->mb_x = x/MB_BLOCK_SIZE; + p_Vid->mb_y = y/MB_BLOCK_SIZE; + p_Vid->block_y = p_Vid->mb_y * BLOCK_SIZE; + p_Vid->pix_c_y = p_Vid->mb_y * p_Vid->mb_cr_size_y; + p_Vid->block_x = p_Vid->mb_x * BLOCK_SIZE; + p_Vid->pix_c_x = p_Vid->mb_x * p_Vid->mb_cr_size_x; + + mv_mul=4; + + // luma ******************************************************* + + for(j=0;j<MB_BLOCK_SIZE/BLOCK_SIZE;j++) + { + joff=j*4; + j4=p_Vid->block_y+j; + for(i=0;i<MB_BLOCK_SIZE/BLOCK_SIZE;i++) + { + ioff=i*4; + i4=p_Vid->block_x+i; + + vec1_x = i4*4*mv_mul + mv[0]; + vec1_y = j4*4*mv_mul + mv[1]; + + get_block_luma(currMB, PLANE_Y, p_Vid->listX[0][ref_frame], i4, j4, mv, BLOCK_SIZE, BLOCK_SIZE, tmp_block); + + for(ii=0;ii<BLOCK_SIZE;ii++) + for(jj=0;jj<MB_BLOCK_SIZE/BLOCK_SIZE;jj++) + currSlice->mb_pred[LumaComp][jj+joff][ii+ioff]=tmp_block[jj][ii]; + } + } + + + for (j = 0; j < 16; j++) + { + for (i = 0; i < 16; i++) + { + pMB[j*16+i] = currSlice->mb_pred[LumaComp][j][i]; + } + } + pMB += 256; + + if (dec_picture->chroma_format_idc != YUV400) + { + // chroma ******************************************************* + f1_x = 64/p_Vid->mb_cr_size_x; + f2_x=f1_x-1; + + f1_y = 64/p_Vid->mb_cr_size_y; + f2_y=f1_y-1; + + f3=f1_x*f1_y; + f4=f3>>1; + + for(uv=0;uv<2;uv++) + { + for (b8=0;b8<(p_Vid->num_uv_blocks);b8++) + { + for(b4=0;b4<4;b4++) + { + joff = subblk_offset_y[yuv][b8][b4]; + j4=p_Vid->pix_c_y+joff; + ioff = subblk_offset_x[yuv][b8][b4]; + i4=p_Vid->pix_c_x+ioff; + + for(jj=0;jj<4;jj++) + { + jf=(j4+jj)/(p_Vid->mb_cr_size_y/4); // jf = Subblock_y-coordinate + for(ii=0;ii<4;ii++) + { + ifx=(i4+ii)/(p_Vid->mb_cr_size_x/4); // ifx = Subblock_x-coordinate + + i1=(i4+ii)*f1_x + mv[0]; + j1=(j4+jj)*f1_y + mv[1]; + + ii0=iClip3 (0, dec_picture->size_x_cr-1, i1/f1_x); + jj0=iClip3 (0, dec_picture->size_y_cr-1, j1/f1_y); + ii1=iClip3 (0, dec_picture->size_x_cr-1, ((i1+f2_x)/f1_x)); + jj1=iClip3 (0, dec_picture->size_y_cr-1, ((j1+f2_y)/f1_y)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + currSlice->mb_pred[uv + 1][jj+joff][ii+ioff]=(if0*jf0*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj0][ii0]+ + if1*jf0*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj0][ii1]+ + if0*jf1*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj1][ii0]+ + if1*jf1*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj1][ii1]+f4)/f3; + } + } + } + } + + for (j = 0; j < 8; j++) + { + for (i = 0; i < 8; i++) + { + pMB[j*8+i] = currSlice->mb_pred[uv + 1][j][i]; + } + } + pMB += 64; + + } + } +} +/*! + ************************************************************************ + * \brief + * Copies pixel values between a YUV frame and the temporary pixel value storage place. This is + * used to save some pixel values temporarily before overwriting it, or to copy back to a given + * location in a frame the saved pixel values. + * \param currYBlockNum + * index of the block (8x8) in the Y plane + * \param predMB + * memory area where the temporary pixel values are stored + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + * \param recfr + * pointer to a YUV frame + * \param picSizeX + * picture width in pixels + * \param regionSize + * can be 16 or 8 to tell the dimension of the region to copy + ************************************************************************ + */ +static void copyPredMB (int currYBlockNum, imgpel *predMB, frame *recfr, + int picSizeX, int regionSize) +{ + VideoParameters *p_Vid = recfr->p_Vid; +StorablePicture *dec_picture = p_Vid->dec_picture; + int j, k, xmin, ymin, xmax, ymax; + int locationTmp, locationPred; + int uv_x = uv_div[0][dec_picture->chroma_format_idc]; + int uv_y = uv_div[1][dec_picture->chroma_format_idc]; + + xmin = (xPosYBlock(currYBlockNum,picSizeX)<<3); + ymin = (yPosYBlock(currYBlockNum,picSizeX)<<3); + xmax = xmin + regionSize -1; + ymax = ymin + regionSize -1; + + for (j = ymin; j <= ymax; j++) + { + for (k = xmin; k <= xmax; k++) + { + locationPred = j * picSizeX + k; + locationTmp = (j-ymin) * 16 + (k-xmin); + dec_picture->imgY->img[j][k] = predMB[locationTmp]; + } + } + + if (dec_picture->chroma_format_idc != YUV400) + { + for (j = (ymin>>uv_y); j <= (ymax>>uv_y); j++) + { + for (k = (xmin>>uv_x); k <= (xmax>>uv_x); k++) + { + locationPred = ((j * picSizeX) >> uv_x) + k; + locationTmp = (j-(ymin>>uv_y)) * p_Vid->mb_cr_size_x + (k-(xmin>>1)) + 256; + dec_picture->imgUV[0]->img[j][k] = predMB[locationTmp]; + + locationTmp += 64; + + dec_picture->imgUV[1]->img[j][k] = predMB[locationTmp]; + } + } + } +} + +/*! + ************************************************************************ + * \brief + * Calculates a weighted pixel difference between edge Y pixels of the macroblock stored in predMB + * and the pixels in the given Y plane of a frame (recY) that would become neighbor pixels if + * predMB was placed at currYBlockNum block position into the frame. This "edge distortion" value + * is used to determine how well the given macroblock in predMB would fit into the frame when + * considering spatial smoothness. If there are correctly received neighbor blocks (status stored + * in predBlocks) only they are used in calculating the edge distorion; otherwise also the already + * concealed neighbor blocks can also be used. + * \return + * The calculated weighted pixel difference at the edges of the MB. + * \param predBlocks + * status array of the neighboring blocks (if they are OK, concealed or lost) + * \param currYBlockNum + * index of the block (8x8) in the Y plane + * \param predMB + * memory area where the temporary pixel values are stored + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + * \param recY + * pointer to a Y plane of a YUV frame + * \param picSizeX + * picture width in pixels + * \param regionSize + * can be 16 or 8 to tell the dimension of the region to copy + ************************************************************************ + */ +static int edgeDistortion (int predBlocks[], int currYBlockNum, imgpel *predMB, + imgpel *recY, int picSizeX, int regionSize) +{ + int i, j, distortion, numOfPredBlocks, threshold = ERC_BLOCK_OK; + imgpel *currBlock = NULL, *neighbor = NULL; + int currBlockOffset = 0; + + currBlock = recY + (yPosYBlock(currYBlockNum,picSizeX)<<3)*picSizeX + (xPosYBlock(currYBlockNum,picSizeX)<<3); + + do + { + + distortion = 0; numOfPredBlocks = 0; + + // loop the 4 neighbors + for (j = 4; j < 8; j++) + { + /* if reliable, count boundary pixel difference */ + if (predBlocks[j] >= threshold) + { + + switch (j) + { + case 4: + neighbor = currBlock - picSizeX; + for ( i = 0; i < regionSize; i++ ) + { + distortion += iabs((int)(predMB[i] - neighbor[i])); + } + break; + case 5: + neighbor = currBlock - 1; + for ( i = 0; i < regionSize; i++ ) + { + distortion += iabs((int)(predMB[i*16] - neighbor[i*picSizeX])); + } + break; + case 6: + neighbor = currBlock + regionSize*picSizeX; + currBlockOffset = (regionSize-1)*16; + for ( i = 0; i < regionSize; i++ ) + { + distortion += iabs((int)(predMB[i+currBlockOffset] - neighbor[i])); + } + break; + case 7: + neighbor = currBlock + regionSize; + currBlockOffset = regionSize-1; + for ( i = 0; i < regionSize; i++ ) + { + distortion += iabs((int)(predMB[i*16+currBlockOffset] - neighbor[i*picSizeX])); + } + break; + } + + numOfPredBlocks++; + } + } + + threshold--; + if (threshold < ERC_BLOCK_CONCEALED) + break; + } while (numOfPredBlocks == 0); + + if(numOfPredBlocks == 0) + { + return 0; + // assert (numOfPredBlocks != 0); !!!KS hmm, trying to continue... + } + return (distortion/numOfPredBlocks); +} + +// picture error concealment below + +/*! +************************************************************************ +* \brief +* The motion prediction pixels are calculated from the given location (in +* 1/4 pixel units) of the referenced frame. It copies the sub block from the +* corresponding reference to the frame to be concealed. +* +************************************************************************* +*/ +static void buildPredblockRegionYUV(VideoParameters *p_Vid, const short *mv, + int x, int y, imgpel *predMB, int list) +{ + int i=0,j=0,ii=0,jj=0,i1=0,j1=0,j4=0,i4=0; + int jf=0; + int uv; + int vec1_x=0,vec1_y=0; + int ioff,joff; + + StorablePicture *dec_picture = p_Vid->dec_picture; + imgpel *pMB = predMB; + + int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0; + int mv_mul; + + //FRExt + int f1_x, f1_y, f2_x, f2_y, f3, f4, ifx; + int yuv = dec_picture->chroma_format_idc - 1; + + int ref_frame = mv[2]; + int mb_nr = p_Vid->current_mb_nr; + + Macroblock *currMB = &p_Vid->mb_data[mb_nr]; // intialization code deleted, see below, StW + Slice *currSlice = currMB->p_Slice; + + h264_imgpel_macroblock_t tmp_block; + + /* Update coordinates of the current concealed macroblock */ + + p_Vid->mb_x = x/BLOCK_SIZE; + p_Vid->mb_y = y/BLOCK_SIZE; + p_Vid->block_y = p_Vid->mb_y * BLOCK_SIZE; + p_Vid->pix_c_y = p_Vid->mb_y * p_Vid->mb_cr_size_y/4; + p_Vid->block_x = p_Vid->mb_x * BLOCK_SIZE; + p_Vid->pix_c_x = p_Vid->mb_x * p_Vid->mb_cr_size_x/4; + + mv_mul=4; + + // luma ******************************************************* + + vec1_x = x*mv_mul + mv[0]; + vec1_y = y*mv_mul + mv[1]; + get_block_luma(currMB, PLANE_Y, p_Vid->listX[list][ref_frame], x,y, mv, BLOCK_SIZE, BLOCK_SIZE, tmp_block); + + for(jj=0;jj<MB_BLOCK_SIZE/BLOCK_SIZE;jj++) + for(ii=0;ii<BLOCK_SIZE;ii++) + currSlice->mb_pred[LumaComp][jj][ii]=tmp_block[jj][ii]; + + + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + pMB[j*4+i] = currSlice->mb_pred[LumaComp][j][i]; + } + } + pMB += 16; + + if (dec_picture->chroma_format_idc != YUV400) + { + // chroma ******************************************************* + f1_x = 64/(p_Vid->mb_cr_size_x); + f2_x=f1_x-1; + + f1_y = 64/(p_Vid->mb_cr_size_y); + f2_y=f1_y-1; + + f3=f1_x*f1_y; + f4=f3>>1; + + for(uv=0;uv<2;uv++) + { + joff = subblk_offset_y[yuv][0][0]; + j4=p_Vid->pix_c_y+joff; + ioff = subblk_offset_x[yuv][0][0]; + i4=p_Vid->pix_c_x+ioff; + + for(jj=0;jj<2;jj++) + { + jf=(j4+jj)/(p_Vid->mb_cr_size_y/4); // jf = Subblock_y-coordinate + for(ii=0;ii<2;ii++) + { + ifx=(i4+ii)/(p_Vid->mb_cr_size_x/4); // ifx = Subblock_x-coordinate + + i1=(i4+ii)*f1_x + mv[0]; + j1=(j4+jj)*f1_y + mv[1]; + + ii0=iClip3 (0, dec_picture->size_x_cr-1, i1/f1_x); + jj0=iClip3 (0, dec_picture->size_y_cr-1, j1/f1_y); + ii1=iClip3 (0, dec_picture->size_x_cr-1, ((i1+f2_x)/f1_x)); + jj1=iClip3 (0, dec_picture->size_y_cr-1, ((j1+f2_y)/f1_y)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + currSlice->mb_pred[uv + 1][jj][ii]=(if0*jf0*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj0][ii0]+ + if1*jf0*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj0][ii1]+ + if0*jf1*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj1][ii0]+ + if1*jf1*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj1][ii1]+f4)/f3; + } + } + + for (j = 0; j < 2; j++) + { + for (i = 0; i < 2; i++) + { + pMB[j*2+i] = currSlice->mb_pred[uv + 1][j][i]; + } + } + pMB += 4; + + } + } +} + +/*! +************************************************************************ +* \brief +* compares two stored pictures by picture number for qsort in descending order +* +************************************************************************ +*/ +static inline int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 ) +{ + int pic_num1 = (*(StorablePicture**)arg1)->pic_num; + int pic_num2 = (*(StorablePicture**)arg2)->pic_num; + + if (pic_num1 < pic_num2) + return 1; + if (pic_num1 > pic_num2) + return -1; + else + return 0; +} + +/*! +************************************************************************ +* \brief +* compares two stored pictures by picture number for qsort in descending order +* +************************************************************************ +*/ +static inline int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 ) +{ + int long_term_pic_num1 = (*(StorablePicture**)arg1)->long_term_pic_num; + int long_term_pic_num2 = (*(StorablePicture**)arg2)->long_term_pic_num; + if ( long_term_pic_num1 < long_term_pic_num2) + return -1; + + if ( long_term_pic_num1 > long_term_pic_num2) + return 1; + else + return 0; +} + +/*! +************************************************************************ +* \brief +* compares two stored pictures by poc for qsort in ascending order +* +************************************************************************ +*/ +static inline int compare_pic_by_poc_asc( const void *arg1, const void *arg2 ) +{ + int poc1 = (*(StorablePicture**)arg1)->poc; + int poc2 = (*(StorablePicture**)arg2)->poc; + + if ( poc1 < poc2) + return -1; + if ( poc1 > poc2) + return 1; + else + return 0; +} + + +/*! +************************************************************************ +* \brief +* compares two stored pictures by poc for qsort in descending order +* +************************************************************************ +*/ +static inline int compare_pic_by_poc_desc( const void *arg1, const void *arg2 ) +{ + int poc1 = (*(StorablePicture**)arg1)->poc; + int poc2 = (*(StorablePicture**)arg2)->poc; + + if (poc1 < poc2) + return 1; + if (poc1 > poc2) + return -1; + else + return 0; +} + +/*! +************************************************************************ +* \brief +* Copy image data from one array to another array +************************************************************************ +*/ + +static void CopyImgData(imgpel **inputY, imgpel ***inputUV, imgpel **outputY, imgpel ***outputUV, + int img_width, int img_height, int img_width_cr, int img_height_cr) +{ + int x, y; + + for (y=0; y<img_height; y++) + for (x=0; x<img_width; x++) + outputY[y][x] = inputY[y][x]; + + for (y=0; y<img_height_cr; y++) + for (x=0; x<img_width_cr; x++) + { + outputUV[0][y][x] = inputUV[0][y][x]; + outputUV[1][y][x] = inputUV[1][y][x]; + } +} + +/*! +************************************************************************ +* \brief +* Copies the last reference frame for concealing reference frame loss. +************************************************************************ +*/ + +static StorablePicture* get_last_ref_pic_from_dpb(DecodedPictureBuffer *p_Dpb) +{ + int used_size = p_Dpb->used_size - 1; + int i; + + for(i = used_size; i >= 0; i--) + { + if (p_Dpb->fs[i]->is_used==3) + { + if (((p_Dpb->fs[i]->frame->used_for_reference) && + (!p_Dpb->fs[i]->frame->is_long_term)) /*|| ((p_Dpb->fs[i]->frame->used_for_reference==0) + && (p_Dpb->fs[i]->frame->slice_type == P_SLICE))*/ ) + { + return p_Dpb->fs[i]->frame; + } + } + } + + return NULL; +} + +/*! +************************************************************************ +* \brief +* Conceals the lost reference or non reference frame by either frame copy +* or motion vector copy concealment. +* +************************************************************************ +*/ + +static void copy_to_conceal(StorablePicture *src, StorablePicture *dst, VideoParameters *p_Vid) +{ + int i=0; + int ii=0, jj=0; + int scale = 1; + StorablePicture *dec_picture = p_Vid->dec_picture; + // InputParameters *test; + + p_Vid->current_mb_nr = 0; + + dst->PicSizeInMbs = src->PicSizeInMbs; + + dst->slice_type = src->slice_type = p_Vid->conceal_slice_type; + + dst->idr_flag = FALSE; //since we do not want to clears the ref list + + dst->no_output_of_prior_pics_flag = src->no_output_of_prior_pics_flag; + dst->long_term_reference_flag = src->long_term_reference_flag; + dst->adaptive_ref_pic_buffering_flag = src->adaptive_ref_pic_buffering_flag = 0; + dst->chroma_format_idc = src->chroma_format_idc; + dst->frame_mbs_only_flag = src->frame_mbs_only_flag; + dst->frame_cropping_flag = src->frame_cropping_flag; + dst->frame_cropping_rect_left_offset = src->frame_cropping_rect_left_offset; + dst->frame_cropping_rect_right_offset = src->frame_cropping_rect_right_offset; + dst->frame_cropping_rect_bottom_offset = src->frame_cropping_rect_bottom_offset; + dst->frame_cropping_rect_top_offset = src->frame_cropping_rect_top_offset; + dst->qp = src->qp; + dst->slice_qp_delta = src->slice_qp_delta; + + dec_picture = src; +} + +/*! +************************************************************************ +* \brief +* Uses the previous reference pic for concealment of reference frames +* +************************************************************************ +*/ + +static void +copy_prev_pic_to_concealed_pic(StorablePicture *picture, VideoParameters *p_Vid) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + + StorablePicture *ref_pic; + /* get the last ref pic in dpb */ + ref_pic = get_last_ref_pic_from_dpb(p_Dpb); + + assert(ref_pic != NULL); + + /* copy all the struc from this to current concealment pic */ + p_Vid->conceal_slice_type = P_SLICE; + copy_to_conceal(ref_pic, picture, p_Vid); +} + + +/*! +************************************************************************ +* \brief +* This function conceals a missing reference frame. The routine is called +* based on the difference in frame number. It conceals an IDR frame loss +* based on the sudden decrease in frame number. +* +************************************************************************ +*/ +// TODO: benski> pass timecode +void conceal_lost_frames(VideoParameters *p_Vid) +{ + int CurrFrameNum; + int UnusedShortTermFrameNum; + StorablePicture *picture = NULL; + int tmp1 = p_Vid->delta_pic_order_cnt[0]; + int tmp2 = p_Vid->delta_pic_order_cnt[1]; + int i; + + p_Vid->delta_pic_order_cnt[0] = p_Vid->delta_pic_order_cnt[1] = 0; + + // printf("A gap in frame number is found, try to fill it.\n"); + + if(p_Vid->IDR_concealment_flag == 1) + { + // Conceals an IDR frame loss. Uses the reference frame in the previous + // GOP for concealment. + UnusedShortTermFrameNum = 0; + p_Vid->last_ref_pic_poc = -p_Vid->poc_gap; + p_Vid->earlier_missing_poc = 0; + } + else + UnusedShortTermFrameNum = (p_Vid->pre_frame_num + 1) % p_Vid->MaxFrameNum; + + CurrFrameNum = p_Vid->frame_num; + + while (CurrFrameNum != UnusedShortTermFrameNum) + { + picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + + picture->coded_frame = 1; + picture->pic_num = UnusedShortTermFrameNum; + picture->frame_num = UnusedShortTermFrameNum; + picture->non_existing = 0; + picture->is_output = 0; + picture->used_for_reference = 1; + picture->concealed_pic = 1; + + picture->adaptive_ref_pic_buffering_flag = 0; + + p_Vid->frame_num = UnusedShortTermFrameNum; + + picture->top_poc=p_Vid->last_ref_pic_poc + p_Vid->ref_poc_gap; + picture->bottom_poc=picture->top_poc; + picture->frame_poc=picture->top_poc; + picture->poc=picture->top_poc; + p_Vid->last_ref_pic_poc = picture->poc; + + copy_prev_pic_to_concealed_pic(picture, p_Vid); + + //if (UnusedShortTermFrameNum == 0) + if(p_Vid->IDR_concealment_flag == 1) + { + picture->slice_type = I_SLICE; + picture->idr_flag = TRUE; + flush_dpb(p_Vid); + picture->top_poc= 0; + picture->bottom_poc=picture->top_poc; + picture->frame_poc=picture->top_poc; + picture->poc=picture->top_poc; + p_Vid->last_ref_pic_poc = picture->poc; + } + + store_picture_in_dpb(p_Vid, picture); + + picture=NULL; + + p_Vid->pre_frame_num = UnusedShortTermFrameNum; + UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % p_Vid->MaxFrameNum; + + // update reference flags and set current flag. + for(i=16;i>0;i--) + { + p_Vid->ref_flag[i] = p_Vid->ref_flag[i-1]; + } + p_Vid->ref_flag[0] = 0; + } + p_Vid->delta_pic_order_cnt[0] = tmp1; + p_Vid->delta_pic_order_cnt[1] = tmp2; + p_Vid->frame_num = CurrFrameNum; +} + +/*! +************************************************************************ +* \brief +* Updates the reference list for motion vector copy concealment for non- +* reference frame loss. +* +************************************************************************ +*/ + +void update_ref_list_for_concealment(DecodedPictureBuffer *p_Dpb) +{ + VideoParameters *p_Vid = p_Dpb->p_Vid; + + unsigned i, j; + for (i=0, j=0; i<p_Dpb->used_size; i++) + { + if (p_Dpb->fs[i]->concealment_reference) + { + p_Dpb->fs_ref[j++] = p_Dpb->fs[i]; + } + } + + p_Dpb->ref_frames_in_buffer = p_Vid->active_pps->num_ref_idx_l0_active_minus1; +} + +/*! +************************************************************************ +* \brief +* Initialize the list based on the B frame or non reference 'p' frame +* to be concealed. The function initialize p_Vid->listX[0] and list 1 depending +* on current picture type +* +************************************************************************ +*/ +void init_lists_for_non_reference_loss(VideoParameters *p_Vid, int currSliceType, PictureStructure currPicStructure) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + unsigned i; + int j; + int MaxFrameNum = 1 << (active_sps->log2_max_frame_num_minus4 + 4); + int diff; + + int list0idx = 0; + int list0idx_1 = 0; + + StorablePicture *tmp_s; + + if (currPicStructure == FRAME) + { + for(i=0;i<p_Dpb->ref_frames_in_buffer; i++) + { + if(p_Dpb->fs[i]->concealment_reference == 1) + { + if(p_Dpb->fs[i]->frame_num > p_Vid->frame_to_conceal) + p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs[i]->frame_num - MaxFrameNum; + else + p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs[i]->frame_num; + p_Dpb->fs_ref[i]->frame->pic_num = p_Dpb->fs_ref[i]->frame_num_wrap; + } + } + } + + if (currSliceType == P_SLICE) + { + // Calculate FrameNumWrap and PicNum + if (currPicStructure == FRAME) + { + for(i=0;i<p_Dpb->used_size; i++) + { + if(p_Dpb->fs[i]->concealment_reference == 1) + { + p_Vid->listX[0][list0idx++] = p_Dpb->fs[i]->frame; + } + } + // order list 0 by PicNum + qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc); + p_Vid->listXsize[0] = list0idx; + } + } + + if (currSliceType == B_SLICE) + { + if (currPicStructure == FRAME) + { + // for(i=0;i<p_Dpb->ref_frames_in_buffer; i++) + for(i=0;i<p_Dpb->used_size; i++) + { + if(p_Dpb->fs[i]->concealment_reference == 1) + { + if(p_Vid->earlier_missing_poc > p_Dpb->fs[i]->frame->poc) + p_Vid->listX[0][list0idx++] = p_Dpb->fs[i]->frame; + } + } + + qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc); + list0idx_1 = list0idx; + + // for(i=0;i<p_Dpb->ref_frames_in_buffer; i++) + for(i=0;i<p_Dpb->used_size; i++) + { + if(p_Dpb->fs[i]->concealment_reference == 1) + { + if(p_Vid->earlier_missing_poc < p_Dpb->fs[i]->frame->poc) + p_Vid->listX[0][list0idx++] = p_Dpb->fs[i]->frame; + } + } + + qsort((void *)&p_Vid->listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc); + + for (j=0; j<list0idx_1; j++) + { + p_Vid->listX[1][list0idx-list0idx_1+j]=p_Vid->listX[0][j]; + } + for (j=list0idx_1; j<list0idx; j++) + { + p_Vid->listX[1][j-list0idx_1]=p_Vid->listX[0][j]; + } + + p_Vid->listXsize[0] = p_Vid->listXsize[1] = list0idx; + + qsort((void *)&p_Vid->listX[0][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + qsort((void *)&p_Vid->listX[1][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + p_Vid->listXsize[0] = p_Vid->listXsize[1] = list0idx; + } + } + + if ((p_Vid->listXsize[0] == p_Vid->listXsize[1]) && (p_Vid->listXsize[0] > 1)) + { + // check if lists are identical, if yes swap first two elements of listX[1] + diff=0; + for (j = 0; j< p_Vid->listXsize[0]; j++) + { + if (p_Vid->listX[0][j]!=p_Vid->listX[1][j]) + diff=1; + } + if (!diff) + { + tmp_s = p_Vid->listX[1][0]; + p_Vid->listX[1][0]=p_Vid->listX[1][1]; + p_Vid->listX[1][1]=tmp_s; + } + } + + // set max size + p_Vid->listXsize[0] = imin (p_Vid->listXsize[0], (int)active_sps->num_ref_frames); + p_Vid->listXsize[1] = imin (p_Vid->listXsize[1], (int)active_sps->num_ref_frames); + + p_Vid->listXsize[1] = 0; + // set the unused list entries to NULL + for (i=p_Vid->listXsize[0]; i< (MAX_LIST_SIZE) ; i++) + { + p_Vid->listX[0][i] = NULL; + } + for (i=p_Vid->listXsize[1]; i< (MAX_LIST_SIZE) ; i++) + { + p_Vid->listX[1][i] = NULL; + } +} + + +/*! +************************************************************************ +* \brief +* Get from the dpb the picture corresponding to a POC. The POC varies +* depending on whether it is a frame copy or motion vector copy concealment. +* The frame corresponding to the POC is returned. +* +************************************************************************ +*/ + +StorablePicture *get_pic_from_dpb(VideoParameters *p_Vid, int missingpoc, unsigned int *pos) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + int used_size = p_Dpb->used_size - 1; + int i, concealfrom = 0; + + for(i = used_size; i >= 0; i--) + { + if(p_Dpb->fs[i]->poc == concealfrom) + { + *pos = i; + return p_Dpb->fs[i]->frame; + } + } + + return NULL; +} + +/*! +************************************************************************ +* \brief +* Function to sort the POC and find the lowest number in the POC list +* Compare the integers +* +************************************************************************ +*/ + +int comp(const void *i, const void *j) +{ + return *(int *)i - *(int *)j; +} + +/*! +************************************************************************ +* \brief +* Initialises a node, allocates memory for the node, and returns +* a pointer to the new node. +* +************************************************************************ +*/ + +struct concealment_node * init_node( StorablePicture* picture, int missingpoc ) +{ + struct concealment_node *ptr; + + ptr = (struct concealment_node *) calloc( 1, sizeof(struct concealment_node ) ); + + if( ptr == NULL ) + return (struct concealment_node *) NULL; + else { + ptr->picture = picture; + ptr->missingpocs = missingpoc; + ptr->next = NULL; + return ptr; + } +} + +/*! +************************************************************************ +* \brief +* Prints the details of a node +* +************************************************************************ +*/ + +void print_node( struct concealment_node *ptr ) +{ + printf("Missing POC=%d\n", ptr->missingpocs ); +} + + +/*! +************************************************************************ +* \brief +* Prints all nodes from the current address passed to it. +* +************************************************************************ +*/ + +void print_list( struct concealment_node *ptr ) +{ + while( ptr != NULL ) + { + print_node( ptr ); + ptr = ptr->next; + } +} + +/*! +************************************************************************ +* \brief +* Adds a node to the end of the list. +* +************************************************************************ +*/ + + +static void add_node( VideoParameters *p_Vid, struct concealment_node *concealment_new ) +{ + if( p_Vid->concealment_head == NULL ) + { + p_Vid->concealment_end = p_Vid->concealment_head = concealment_new; + return; + } + p_Vid->concealment_end->next = concealment_new; + p_Vid->concealment_end = concealment_new; +} + + +/*! +************************************************************************ +* \brief +* Deletes the specified node pointed to by 'ptr' from the list +* +************************************************************************ +*/ + + +static void delete_node( VideoParameters *p_Vid, struct concealment_node *ptr ) +{ + // We only need to delete the first node in the linked list + if( ptr == p_Vid->concealment_head ) + { + p_Vid->concealment_head = p_Vid->concealment_head->next; + if( p_Vid->concealment_end == ptr ) + p_Vid->concealment_end = p_Vid->concealment_end->next; + free(ptr); + } +} + +/*! +************************************************************************ +* \brief +* Deletes all nodes from the place specified by ptr +* +************************************************************************ +*/ + +void delete_list( VideoParameters *p_Vid, struct concealment_node *ptr ) +{ + struct concealment_node *temp; + + if( p_Vid->concealment_head == NULL ) return; + + if( ptr == p_Vid->concealment_head ) + { + p_Vid->concealment_head = NULL; + p_Vid->concealment_end = NULL; + } + else + { + temp = p_Vid->concealment_head; + + while( temp->next != ptr ) + temp = temp->next; + p_Vid->concealment_end = temp; + } + + while( ptr != NULL ) + { + temp = ptr->next; + free( ptr ); + ptr = temp; + } +} + +/*! +************************************************************************ +* \brief +* Stores the missing non reference frames in the concealment buffer. The +* detection is based on the POC difference in the sorted POC array. A missing +* non reference frame is detected when the dpb is full. A singly linked list +* is maintained for storing the missing non reference frames. +* +************************************************************************ +*/ +// TODO: benski> pass timecode +void conceal_non_ref_pics(VideoParameters *p_Vid, int diff) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + int missingpoc = 0; + unsigned int i, pos = 0; + StorablePicture *conceal_from_picture = NULL; + StorablePicture *conceal_to_picture = NULL; + struct concealment_node *concealment_ptr = NULL; + int temp_used_size = p_Dpb->used_size; + + if(p_Dpb->used_size == 0 ) + return; + + qsort(p_Vid->pocs_in_dpb, p_Dpb->size, sizeof(int), comp); + + for(i=0;i<p_Dpb->size-diff;i++) + { + p_Dpb->used_size = p_Dpb->size; + if((p_Vid->pocs_in_dpb[i+1] - p_Vid->pocs_in_dpb[i]) > p_Vid->poc_gap) + { + conceal_to_picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + + missingpoc = p_Vid->pocs_in_dpb[i] + p_Vid->poc_gap; + // Diagnostics + // printf("\n missingpoc = %d\n",missingpoc); + + if(missingpoc > p_Vid->earlier_missing_poc) + { + p_Vid->earlier_missing_poc = missingpoc; + conceal_to_picture->top_poc= missingpoc; + conceal_to_picture->bottom_poc=missingpoc; + conceal_to_picture->frame_poc=missingpoc; + conceal_to_picture->poc=missingpoc; + conceal_from_picture = get_pic_from_dpb(p_Vid, missingpoc, &pos); + + assert(conceal_from_picture != NULL); + + p_Dpb->used_size = pos+1; + + p_Vid->frame_to_conceal = conceal_from_picture->frame_num + 1; + + update_ref_list_for_concealment(p_Dpb); + p_Vid->conceal_slice_type = B_SLICE; + copy_to_conceal(conceal_from_picture, conceal_to_picture, p_Vid); + concealment_ptr = init_node( conceal_to_picture, missingpoc ); + add_node(p_Vid, concealment_ptr); + // Diagnostics + // print_node(concealment_ptr); + } + } + } + + //restore the original value + //p_Dpb->used_size = p_Dpb->size; + p_Dpb->used_size = temp_used_size; +} + +/*! +************************************************************************ +* \brief +* Perform Sliding window decoded reference picture marking process. It +* maintains the POC s stored in the dpb at a specific instance. +* +************************************************************************ +*/ + +void sliding_window_poc_management(DecodedPictureBuffer *p_Dpb, StorablePicture *p) +{ + if (p_Dpb->used_size == p_Dpb->size) + { + VideoParameters *p_Vid = p_Dpb->p_Vid; + unsigned int i; + + for(i=0;i<p_Dpb->size-1; i++) + p_Vid->pocs_in_dpb[i] = p_Vid->pocs_in_dpb[i+1]; + } + + // p_Vid->pocs_in_dpb[p_Dpb->used_size-1] = p->poc; +} + + +/*! +************************************************************************ +* \brief +* Outputs the non reference frames. The POCs in the concealment buffer are +* sorted in ascending order and outputted when the lowest POC in the +* concealment buffer is lower than the lowest in the p_Dpb-> The linked list +* entry corresponding to the outputted POC is immediately deleted. +* +************************************************************************ +*/ + +void write_lost_non_ref_pic(VideoParameters *p_Vid, int poc) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + FrameStore concealment_fs; + if(poc > 0) + { + if((poc - p_Dpb->last_output_poc) > p_Vid->poc_gap) + { + + concealment_fs.frame = p_Vid->concealment_head->picture; + concealment_fs.is_output = 0; + concealment_fs.is_reference = 0; + concealment_fs.is_used = 3; + + write_stored_frame(p_Vid, &concealment_fs); + delete_node(p_Vid, p_Vid->concealment_head); + } + } +} + +/*! +************************************************************************ +* \brief +* Conceals frame loss immediately after the IDR. This special case produces +* the same result for either frame copy or motion vector copy concealment. +* +************************************************************************ +*/ +// TODO: benski> pass timecode +void write_lost_ref_after_idr(VideoParameters *p_Vid, int pos) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + int temp = 1; + + if(p_Vid->last_out_fs->frame == NULL) + { + p_Vid->last_out_fs->frame = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, + p_Vid->width_cr, p_Vid->height_cr); + p_Vid->last_out_fs->is_used = 3; + } + + copy_to_conceal(p_Dpb->fs[pos]->frame, p_Vid->last_out_fs->frame, p_Vid); +} + diff --git a/Src/h264dec/ldecod/src/errorconcealment.c b/Src/h264dec/ldecod/src/errorconcealment.c new file mode 100644 index 00000000..6b1b47bf --- /dev/null +++ b/Src/h264dec/ldecod/src/errorconcealment.c @@ -0,0 +1,138 @@ + +/*! + *********************************************************************** + * \file errorconcealment.c + * + * \brief + * Implements error concealment scheme for H.264 decoder + * + * \date + * 6.10.2000 + * + * \version + * 1.0 + * + * \note + * This simple error concealment implemented in this decoder uses + * the existing dependencies of syntax elements. + * In case that an element is detected as false this elements and all + * dependend elements are marked as elements to conceal in the p_Vid->ec_flag[] + * array. If the decoder requests a new element by the function + * readSyntaxElement_xxxx() this array is checked first if an error concealment has + * to be applied on this element. + * In case that an error occured a concealed element is given to the + * decoding function in macroblock(). + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> + *********************************************************************** + */ + +#include "contributors.h" +#include "global.h" +#include "elements.h" + + + +/*! + *********************************************************************** + * \brief + * set concealment for all elements in same partition + * and dependend syntax elements + * \param p_Vid + * image encoding parameters for current picture + * \param se + * type of syntax element to conceal + * \return + * EC_REQ, elements of same type or depending type need error concealment. \n + * EX_SYNC sync on next header + *********************************************************************** + */ +int set_ec_flag(VideoParameters *p_Vid, int se) +{ + + /* + if (p_Vid->ec_flag[se] == NO_EC) + printf("Error concealment on element %s\n",SEtypes[se]); + */ + switch (se) + { + case SE_HEADER : + p_Vid->ec_flag[SE_HEADER] = EC_REQ; + case SE_PTYPE : + p_Vid->ec_flag[SE_PTYPE] = EC_REQ; + case SE_MBTYPE : + p_Vid->ec_flag[SE_MBTYPE] = EC_REQ; + + case SE_REFFRAME : + p_Vid->ec_flag[SE_REFFRAME] = EC_REQ; + p_Vid->ec_flag[SE_MVD] = EC_REQ; // set all motion vectors to zero length + se = SE_CBP_INTER; // conceal also Inter texture elements + break; + + case SE_INTRAPREDMODE : + p_Vid->ec_flag[SE_INTRAPREDMODE] = EC_REQ; + se = SE_CBP_INTRA; // conceal also Intra texture elements + break; + case SE_MVD : + p_Vid->ec_flag[SE_MVD] = EC_REQ; + se = SE_CBP_INTER; // conceal also Inter texture elements + break; + + default: + break; + } + + switch (se) + { + case SE_CBP_INTRA : + p_Vid->ec_flag[SE_CBP_INTRA] = EC_REQ; + case SE_LUM_DC_INTRA : + p_Vid->ec_flag[SE_LUM_DC_INTRA] = EC_REQ; + case SE_CHR_DC_INTRA : + p_Vid->ec_flag[SE_CHR_DC_INTRA] = EC_REQ; + case SE_LUM_AC_INTRA : + p_Vid->ec_flag[SE_LUM_AC_INTRA] = EC_REQ; + case SE_CHR_AC_INTRA : + p_Vid->ec_flag[SE_CHR_AC_INTRA] = EC_REQ; + break; + + case SE_CBP_INTER : + p_Vid->ec_flag[SE_CBP_INTER] = EC_REQ; + case SE_LUM_DC_INTER : + p_Vid->ec_flag[SE_LUM_DC_INTER] = EC_REQ; + case SE_CHR_DC_INTER : + p_Vid->ec_flag[SE_CHR_DC_INTER] = EC_REQ; + case SE_LUM_AC_INTER : + p_Vid->ec_flag[SE_LUM_AC_INTER] = EC_REQ; + case SE_CHR_AC_INTER : + p_Vid->ec_flag[SE_CHR_AC_INTER] = EC_REQ; + break; + case SE_DELTA_QUANT_INTER : + p_Vid->ec_flag[SE_DELTA_QUANT_INTER] = EC_REQ; + break; + case SE_DELTA_QUANT_INTRA : + p_Vid->ec_flag[SE_DELTA_QUANT_INTRA] = EC_REQ; + break; + default: + break; + + } + return EC_REQ; +} + +/*! + *********************************************************************** + * \brief + * resets EC_Flags called at the start of each slice + * + *********************************************************************** + */ +void reset_ec_flags(VideoParameters *p_Vid) +{ + int i; + for (i=0; i<SE_MAX_ELEMENTS; i++) + p_Vid->ec_flag[i] = NO_EC; +} + diff --git a/Src/h264dec/ldecod/src/filter_chroma_horiz.c b/Src/h264dec/ldecod/src/filter_chroma_horiz.c new file mode 100644 index 00000000..dbed3e15 --- /dev/null +++ b/Src/h264dec/ldecod/src/filter_chroma_horiz.c @@ -0,0 +1,533 @@ +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" +#include <mmintrin.h> +#include <emmintrin.h> + +static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ; +static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ; +static const byte CLIP_TAB[52][5] = +{ + { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0}, + { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0}, + { -1, 0, 0, 0, 0},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 1, 1, 1, 1}, + { -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 2, 3, 3}, + { -1, 1, 2, 3, 3},{ -1, 2, 2, 3, 3},{ -1, 2, 2, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 3, 3, 5, 5},{ -1, 3, 4, 6, 6},{ -1, 3, 4, 6, 6}, + { -1, 4, 5, 7, 7},{ -1, 4, 5, 8, 8},{ -1, 4, 6, 9, 9},{ -1, 5, 7,10,10},{ -1, 6, 8,11,11},{ -1, 6, 8,13,13},{ -1, 7,10,14,14},{ -1, 8,11,16,16}, + { -1, 9,12,18,18},{ -1,10,13,20,20},{ -1,11,15,23,23},{ -1,13,17,25,25} +}; + +static const int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format] + +#define LOAD_LINE_EPI16(reg, ptr) { reg = _mm_loadl_epi64((__m128i *)(ptr)); reg = _mm_unpacklo_epi8(reg, xmm_zero); } +static void FilterChroma8_Horiz_sse2(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, const byte Strength[16], const byte *ClipTab, int Alpha, int Beta, int bitdepth_scale, int max_imgpel_value) +{ + __m128i xmm_L1, xmm_L0, xmm_R0, xmm_R1; + __m128i xmm_strength; + __m128i xmm_absdiff, xmm_diff, xmm_acc; + __m128i xmm_127, xmm_zero; + __m128i xmm_alpha, xmm_beta; + + int match; + xmm_zero = _mm_setzero_si128(); + xmm_strength = _mm_load_si128((__m128i *)Strength); + xmm_127 = _mm_set1_epi8(127); + xmm_strength = _mm_adds_epu8(xmm_strength, xmm_127); + xmm_strength = _mm_srai_epi16(xmm_strength, 15); // shift so it's all 0xFFFF or 0x0000 + + LOAD_LINE_EPI16(xmm_R0, SrcPtrQ); + LOAD_LINE_EPI16(xmm_L0, SrcPtrP); + + xmm_alpha = _mm_set1_epi16((uint16_t)Alpha); + + // if ( abs( R0 - L0 ) < Alpha ) + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0); + xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + LOAD_LINE_EPI16(xmm_R1, SrcPtrQ+inc_dim); + + xmm_beta = _mm_set1_epi16((uint16_t)Beta); + + // if ( abs(R0 - R1) < Beta ) + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R1); + xmm_absdiff =_mm_subs_epu16(xmm_R1, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + LOAD_LINE_EPI16(xmm_L1, SrcPtrP-inc_dim); + + // if ( abs(L0 - L1) < Beta ) + xmm_diff=_mm_subs_epu16(xmm_L0, xmm_L1); + xmm_absdiff =_mm_subs_epu16(xmm_L1, xmm_L0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + if (Strength[0] == 4) // if strong filter is in use, ALL strengths will be 4 + { + // *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + __m128i xmm_2 = _mm_set1_epi16(2); + + xmm_acc = xmm_L1; + xmm_acc = _mm_slli_epi16(xmm_acc, 1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_L0); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_R1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_2); + xmm_acc = _mm_srai_epi16(xmm_acc, 2); + xmm_acc = _mm_and_si128(xmm_acc, xmm_strength); + xmm_L0 = _mm_andnot_si128(xmm_strength, xmm_L0); + xmm_L0 = _mm_or_si128(xmm_L0, xmm_acc); + xmm_L0 = _mm_packus_epi16(xmm_L0, xmm_L0); + _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_L0); + + // *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + xmm_acc = xmm_R1; + xmm_acc = _mm_slli_epi16(xmm_acc, 1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_R0); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_L1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_2); + xmm_acc = _mm_srai_epi16(xmm_acc, 2); + xmm_acc = _mm_and_si128(xmm_acc, xmm_strength); + xmm_R0 = _mm_andnot_si128(xmm_strength, xmm_R0); + xmm_R0 = _mm_or_si128(xmm_R0, xmm_acc); + xmm_R0 = _mm_packus_epi16(xmm_R0, xmm_R0); + _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_R0); + } + else + { + int C0 = ClipTab[ Strength[0] ] * bitdepth_scale + 1; + int C1 = ClipTab[ Strength[4] ] * bitdepth_scale + 1; + int C2 = ClipTab[ Strength[8] ] * bitdepth_scale + 1; + int C3 = ClipTab[ Strength[12] ] * bitdepth_scale + 1; + __m128i xmm_tc0 = _mm_setr_epi16(C0, C0, C1, C1, C2, C2, C3, C3); // TODO: benski> probably a better way to do this. + __m128i xmm_negative_tc0 = _mm_sub_epi16(xmm_zero, xmm_tc0); + __m128i xmm_4 = _mm_set1_epi16(4); + //int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + xmm_acc = xmm_R0; + xmm_acc = _mm_sub_epi16(xmm_acc, xmm_L0); + xmm_acc = _mm_slli_epi16(xmm_acc, 2); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_L1); + xmm_acc = _mm_sub_epi16(xmm_acc, xmm_R1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_4); + xmm_acc = _mm_srai_epi16(xmm_acc, 3); + xmm_acc = _mm_min_epi16(xmm_acc, xmm_tc0); + xmm_acc = _mm_max_epi16(xmm_acc, xmm_negative_tc0); + xmm_acc = _mm_and_si128(xmm_acc, xmm_strength); + + // *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ; + xmm_L0 = _mm_add_epi16(xmm_L0, xmm_acc); + xmm_L0 = _mm_packus_epi16(xmm_L0, xmm_L0); + _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_L0); + + // *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ; + xmm_R0 = _mm_sub_epi16(xmm_R0, xmm_acc); + xmm_R0 = _mm_packus_epi16(xmm_R0, xmm_R0); + _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_R0); + } + + +} + +static void IntraStrongFilter_Chroma8_Horiz_YUV420_sse2(int inc_dim, imgpel *SrcPtrP, int Alpha, int Beta) +{ + __m128i xmm_L1, xmm_L0, xmm_R0, xmm_R1; + __m128i xmm_strength; + __m128i xmm_absdiff, xmm_diff, xmm_acc; + __m128i xmm_zero; + __m128i xmm_alpha, xmm_beta; +__m128i xmm_2; + + int match; + xmm_zero = _mm_setzero_si128(); + + LOAD_LINE_EPI16(xmm_L0, SrcPtrP); + LOAD_LINE_EPI16(xmm_R0, SrcPtrP+inc_dim); + + xmm_alpha = _mm_set1_epi16((uint16_t)Alpha); + + // if ( abs( R0 - L0 ) < Alpha ) + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0); + xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_strength = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + LOAD_LINE_EPI16(xmm_R1, SrcPtrP+2*inc_dim); + + xmm_beta = _mm_set1_epi16((uint16_t)Beta); + + // if ( abs(R0 - R1) < Beta ) + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R1); + xmm_absdiff =_mm_subs_epu16(xmm_R1, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + LOAD_LINE_EPI16(xmm_L1, SrcPtrP-inc_dim); + + // if ( abs(L0 - L1) < Beta ) + xmm_diff=_mm_subs_epu16(xmm_L0, xmm_L1); + xmm_absdiff =_mm_subs_epu16(xmm_L1, xmm_L0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + + // *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + xmm_2 = _mm_set1_epi16(2); + + xmm_acc = xmm_L1; + xmm_acc = _mm_slli_epi16(xmm_acc, 1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_L0); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_R1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_2); + xmm_acc = _mm_srai_epi16(xmm_acc, 2); + xmm_acc = _mm_and_si128(xmm_acc, xmm_strength); + xmm_L0 = _mm_andnot_si128(xmm_strength, xmm_L0); + xmm_L0 = _mm_or_si128(xmm_L0, xmm_acc); + xmm_L0 = _mm_packus_epi16(xmm_L0, xmm_L0); + _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_L0); + + // *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + xmm_acc = xmm_R1; + xmm_acc = _mm_slli_epi16(xmm_acc, 1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_R0); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_L1); + xmm_acc = _mm_add_epi16(xmm_acc, xmm_2); + xmm_acc = _mm_srai_epi16(xmm_acc, 2); + xmm_acc = _mm_and_si128(xmm_acc, xmm_strength); + xmm_R0 = _mm_andnot_si128(xmm_strength, xmm_R0); + xmm_R0 = _mm_or_si128(xmm_R0, xmm_acc); + xmm_R0 = _mm_packus_epi16(xmm_R0, xmm_R0); + _mm_storel_epi64((__m128i *)(SrcPtrP+inc_dim), xmm_R0); + + + +} + + +// separate function to make it easier to unit test +static void FilterChroma8_Horiz(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, const byte Strength[16], const byte *ClipTab, int Alpha, int Beta, int bitdepth_scale, int max_imgpel_value) +{ + int pel; + for( pel = 0 ; pel < 8 ; ++pel, SrcPtrP++, SrcPtrQ++ ) + { + int Strng = Strength[(((pel >> 1) << 2) + (pel & 0x01))]; + + if( Strng != 0) + { + imgpel L0 = *SrcPtrP; + imgpel R0 = *SrcPtrQ; + + if ( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *(SrcPtrQ + inc_dim); + if ( abs(R0 - R1) < Beta ) + { + imgpel L1 = *(SrcPtrP - inc_dim); + if ( abs(L0 - L1) < Beta ) + { + if( Strng == 4 ) // INTRA strong filtering + { + *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + } + else + { + int tc0 = ClipTab[ Strng ] * bitdepth_scale + 1; + int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ; + } + } + } + } + } + } +} + +void EdgeLoopChromaNormal_Horiz(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p) +{ + // dir == 1 + imgpel** Img = image->img; + VideoParameters *p_Vid = MbQ->p_Vid; + + int yQ = (edge < 16 ? edge - 1: 0); + PixelPos pixMB1; + + p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_CHROMA], &pixMB1); + + if (pixMB1.available || (MbQ->DFDisableIdc == 0)) + { + int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + PixelPos pixP = pixMB1; + Macroblock *MbP = &(p_Vid->mb_data[pixP.mb_addr]); + + // Average QP of the two blocks + int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + int Beta = BETA_TABLE [indexB] * bitdepth_scale; + + if (Alpha !=0 && Beta != 0) + { + const int PelNum = pelnum_cr[1][p->chroma_format_idc]; + const byte *ClipTab = CLIP_TAB[indexA]; + int inc_dim = image->stride; + int pel; + PixelPos pixQ, pixMB2; + + p_Vid->getNeighbour0X(MbQ, ++yQ, p_Vid->mb_size[IS_CHROMA], &pixMB2); + pixQ = pixMB2; + + if (pelnum_cr[1][p->chroma_format_idc] == 8) + { + imgpel *SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + imgpel *SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + if (sse2_flag) + FilterChroma8_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, Strength, ClipTab, Alpha, Beta, bitdepth_scale, max_imgpel_value); + else + FilterChroma8_Horiz(inc_dim, SrcPtrP, SrcPtrQ, Strength, ClipTab, Alpha, Beta, bitdepth_scale, max_imgpel_value); + + } + else + { + for( pel = 0 ; pel < PelNum ; ++pel ) + { + int Strng = Strength[(PelNum == 8) ? (((pel >> 1) << 2) + (pel & 0x01)) : pel]; + + if( Strng != 0) + { + imgpel *SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + imgpel L0 = *SrcPtrP; + imgpel *SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + imgpel R0 = *SrcPtrQ; + + if ( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *(SrcPtrQ + inc_dim); + if ( abs(R0 - R1) < Beta ) + { + imgpel L1 = *(SrcPtrP - inc_dim); + if ( abs(L0 - L1) < Beta ) + { + if( Strng == 4 ) // INTRA strong filtering + { + *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + } + else + { + int tc0 = ClipTab[ Strng ] * bitdepth_scale + 1; + int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ; + } + } + } + } + } + pixP.pos_x++; + pixQ.pos_x++; + } + } + } + } +} + + +static void FilterChroma8_Horiz_sse(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const char *ClipTab) +{ + __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1); + __m64 mmx_zero = _mm_setzero_si64(), mmx_four=_mm_set1_pi16(4); + __m64 mmx_minus_one; + __m64 mmx_absdiff, mmx_diff; + __m64 mmx_L0, mmx_L1; + __m64 mmx_R0, mmx_R1; + __m64 mmx_C0, mmx_negative_C0, mmx_dif, mmx_match; + int match; + int i=0; + + mmx_minus_one = _mm_set1_pi32(-1); + + STAGE: + + while (!Strength[i*2] && !Strength[i*2+1]) + { + SrcPtrP += 4; + if (i++ == 1) // last stage + return; + } + + mmx_L0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP)); + mmx_R0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+p_step)); + + // abs( R0 - L0 ) < Alpha + // MMX doesn't have unsigned compare, so we have to go to short + mmx_L0 = _mm_unpacklo_pi8(mmx_L0, mmx_zero); + mmx_R0 = _mm_unpacklo_pi8(mmx_R0, mmx_zero); + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0); + mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do + mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + SrcPtrP += 4; + goto STAGE; // start the process over from next position + } + + // abs( R0 - R1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_R0 already populated + mmx_R1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+2*p_step)); + mmx_R1 = _mm_unpacklo_pi8(mmx_R1, mmx_zero); + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1); + mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + SrcPtrP += 4; + goto STAGE; // start the process over from next position + } + + // abs(L0 - L1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_L0 already populated + mmx_L1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP-p_step)); + mmx_L1 = _mm_unpacklo_pi8(mmx_L1, mmx_zero); + mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1); + mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; +SrcPtrP += 4; + goto STAGE; // start the process over from next position + } + + // ok, now time to performn the actual calculation. hope it was worth it!! + + // tc0 = ClipTab[ Strng ] + 1 + mmx_C0 = _mm_setr_pi16(ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2+1]]+1, ClipTab[Strength[i*2+1]]+1); + mmx_negative_C0 = _mm_sub_pi16(mmx_zero, mmx_C0); + + // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + mmx_dif = mmx_R0; + mmx_dif = _mm_sub_pi16(mmx_dif, mmx_L0); + mmx_dif = _mm_slli_pi16(mmx_dif, 2); + mmx_dif = _mm_add_pi16(mmx_dif, mmx_L1); + mmx_dif = _mm_sub_pi16(mmx_dif, mmx_R1); + mmx_dif = _mm_add_pi16(mmx_dif, mmx_four); + mmx_dif = _mm_srai_pi16(mmx_dif, 3); + mmx_dif = _mm_min_pi16(mmx_dif, mmx_C0); + mmx_dif = _mm_max_pi16(mmx_dif, mmx_negative_C0); + mmx_dif = _mm_and_si64(mmx_dif, mmx_match); + + // L0 = (imgpel) iClip1(max_imgpel_value, L0 + dif); + mmx_L0 = _mm_add_pi16(mmx_L0, mmx_dif); + + // R0 = (imgpel) iClip1(max_imgpel_value, R0 - dif); + mmx_R0 = _mm_sub_pi16(mmx_R0, mmx_dif); + + // store + mmx_R0 = _mm_packs_pu16(mmx_R0, mmx_R0); + mmx_L0 = _mm_packs_pu16(mmx_L0, mmx_L0); + + *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_L0); + *(int *)(SrcPtrP+p_step) = _mm_cvtsi64_si32(mmx_R0); + + if (i++ == 1) + return; + + SrcPtrP += 4; + goto STAGE; // next stage +} + + +void EdgeLoopChroma_Horiz_YUV420(VideoImage *image, const byte strength[4], Macroblock *MbQ, int uv, PixelPos pixMB, Macroblock *MbP) +{ + // dir == 1 + imgpel** Img = image->img; + + if (pixMB.available || (MbQ->DFDisableIdc == 0)) + { + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + + // Average QP of the two blocks + int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + int Alpha = ALPHA_TABLE[indexA] ; + int Beta = BETA_TABLE [indexB] ; + + if (Alpha !=0 && Beta != 0) + { + const int PelNum = 8; + + int inc_dim = image->stride; + imgpel *SrcPtrP; + + + SrcPtrP = &(Img[pixMB.pos_y>>1][pixMB.pos_x>>1]); + + if (strength[0] == 4) // if strong filter is used, all blocks will be strong + { + IntraStrongFilter_Chroma8_Horiz_YUV420_sse2(inc_dim, SrcPtrP, Alpha, Beta); + } + else + { + const byte *ClipTab = CLIP_TAB[indexA]; + FilterChroma8_Horiz_sse(inc_dim, SrcPtrP, Alpha, Beta, strength, ClipTab); + } + } + } +}
\ No newline at end of file diff --git a/Src/h264dec/ldecod/src/filter_chroma_vert.c b/Src/h264dec/ldecod/src/filter_chroma_vert.c new file mode 100644 index 00000000..8c4a4c8c --- /dev/null +++ b/Src/h264dec/ldecod/src/filter_chroma_vert.c @@ -0,0 +1,570 @@ +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" +#include <emmintrin.h> +static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ; +static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ; +static const byte CLIP_TAB[52][5] = +{ + { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0}, + { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0}, + { -1, 0, 0, 0, 0},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 1, 1, 1, 1}, + { -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 2, 3, 3}, + { -1, 1, 2, 3, 3},{ -1, 2, 2, 3, 3},{ -1, 2, 2, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 3, 3, 5, 5},{ -1, 3, 4, 6, 6},{ -1, 3, 4, 6, 6}, + { -1, 4, 5, 7, 7},{ -1, 4, 5, 8, 8},{ -1, 4, 6, 9, 9},{ -1, 5, 7,10,10},{ -1, 6, 8,11,11},{ -1, 6, 8,13,13},{ -1, 7,10,14,14},{ -1, 8,11,16,16}, + { -1, 9,12,18,18},{ -1,10,13,20,20},{ -1,11,15,23,23},{ -1,13,17,25,25} +} ; + +static const int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format] + +void EdgeLoopChromaNormal_Vert(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p) +{ + // dir == 0 + imgpel** Img = image->img; + VideoParameters *p_Vid = MbQ->p_Vid; + + int xQ = edge - 1; + int yQ = 0; + PixelPos pixMB1; + + p_Vid->getNeighbourX0(MbQ, xQ, p_Vid->mb_size[IS_CHROMA], &pixMB1); + + if (pixMB1.available || (MbQ->DFDisableIdc == 0)) + { + int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + PixelPos pixP = pixMB1; + Macroblock *MbP = &(p_Vid->mb_data[pixP.mb_addr]); + + // Average QP of the two blocks + int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + int Beta = BETA_TABLE [indexB] * bitdepth_scale; + + if (Alpha !=0 && Beta != 0) + { + const int PelNum = pelnum_cr[0][p->chroma_format_idc]; + const byte *ClipTab = CLIP_TAB[indexA]; + int inc_dim = 1; + int pel; + PixelPos pixQ, pixMB2; + + p_Vid->getNeighbourX0(MbQ, edge, p_Vid->mb_size[IS_CHROMA], &pixMB2); + pixQ = pixMB2; + + for( pel = 0 ; pel < PelNum ; ++pel ) + { + int Strng = Strength[(PelNum == 8) ? (((pel >> 1) << 2) + (pel & 0x01)) : pel]; + + if( Strng != 0) + { + imgpel *SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + imgpel L0 = *SrcPtrP; + imgpel *SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + imgpel R0 = *SrcPtrQ; + + if ( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *(SrcPtrQ + inc_dim); + if ( abs(R0 - R1) < Beta ) + { + imgpel L1 = *(SrcPtrP - inc_dim); + if ( abs(L0 - L1) < Beta ) + { + if( Strng == 4 ) // INTRA strong filtering + { + *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + } + else + { + int tc0 = ClipTab[ Strng ] * bitdepth_scale + 1; + int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ; + } + } + } + } + } + pixP.pos_y++; + pixQ.pos_y++; + } + } + } +} + +static void FilterStrongChroma_Vert_sse(int p_step, imgpel *SrcPtrP, int Alpha, int Beta) +{ + __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1); + __m64 mmx_zero = _mm_setzero_si64(), mmx_two=_mm_set1_pi16(2); + __m64 mmx_minus_one; + __m64 mmx_absdiff, mmx_diff; + __m64 mmx_L0, mmx_L1, mmx_L1_L0; + __m64 mmx_R0, mmx_R0_R1, mmx_R1; + __m64 mmx_load0, mmx_load1, mmx_load2, mmx_load3, mmx_load4, mmx_load5, mmx_load6, mmx_load7, mmx_load8; + __m64 mmx_match, mmx_L0_new, mmx_R0_new; + int match; + int i=0; + + mmx_minus_one = _mm_set1_pi32(-1); + SrcPtrP -= 1; + + STAGE: + mmx_load0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP)); // La1 La0 Ra0 Ra1 --- --- --- --- + mmx_load1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lb1 Lb0 Rb0 Rb1 --- --- --- --- + mmx_load4 = _mm_unpacklo_pi8(mmx_load0, mmx_load1); // La1 Lb1 La0 Lb0 Ra0 Rb0 Ra1 Rb1 + mmx_load2 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lc1 Lc0 Rc0 Rc1 --- --- --- --- + mmx_load3 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Ld1 Ld0 Rd0 Rd1 --- --- --- --- + SrcPtrP+=p_step; + mmx_load5 = _mm_unpacklo_pi8(mmx_load2, mmx_load3); // Lc1 Ld1 Lc0 Ld0 Rc0 Rd0 Rc1 Rd1 + mmx_L1_L0 = _mm_unpacklo_pi16(mmx_load4, mmx_load5); // La1 Lb1 Lc1 Ld1 La0 Lb0 Lc0 Ld0 + mmx_R0_R1 = _mm_unpackhi_pi16(mmx_load4, mmx_load5); // Ra0 Rb0 Rc0 Rd0 Ra1 Rb1 Rc1 Rd1 + + // abs( R0 - L0 ) < Alpha + // MMX doesn't have unsigned compare, so we have to go to short + mmx_L0 = _mm_unpackhi_pi8(mmx_L1_L0, mmx_zero); // La0 Lb0 Lc0 Ld0 + mmx_R0 = _mm_unpacklo_pi8(mmx_R0_R1, mmx_zero); // Ra0 Rb0 Rc0 Rd0 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0); + mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do + mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // abs( R0 - R1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_R0 already populated + mmx_R1 = _mm_unpackhi_pi8(mmx_R0_R1, mmx_zero); // Ra1 Rb1 Rc1 Rd1 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1); + mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // abs(L0 - L1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_L0 already populated + mmx_L1 = _mm_unpacklo_pi8(mmx_L1_L0, mmx_zero); // La1 Lb1 Lc1 Ld1 + mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1); + mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // ok, now time to performn the actual calculation. hope it was worth it!! + + // L0 = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + mmx_L0_new = mmx_L1; + mmx_L0_new = _mm_slli_pi16(mmx_L0_new, 1); + mmx_L0_new = _mm_add_pi16(mmx_L0_new, mmx_L0); + mmx_L0_new = _mm_add_pi16(mmx_L0_new, mmx_R1); + mmx_L0_new = _mm_add_pi16(mmx_L0_new, mmx_two); + mmx_L0_new = _mm_srai_pi16(mmx_L0_new, 2); + mmx_L0_new = _mm_and_si64(mmx_L0_new, mmx_match); + mmx_L0 = _mm_andnot_si64(mmx_match, mmx_L0); + mmx_L0 = _mm_or_si64(mmx_L0, mmx_L0_new); + + // R0 = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + mmx_R0_new = mmx_R1; + mmx_R0_new = _mm_slli_pi16(mmx_R0_new, 1); + mmx_R0_new = _mm_add_pi16(mmx_R0_new, mmx_R0); + mmx_R0_new = _mm_add_pi16(mmx_R0_new, mmx_L1); + mmx_R0_new = _mm_add_pi16(mmx_R0_new, mmx_two); + mmx_R0_new = _mm_srai_pi16(mmx_R0_new, 2); + mmx_R0_new = _mm_and_si64(mmx_R0_new, mmx_match); + mmx_R0 = _mm_andnot_si64(mmx_match, mmx_R0); + mmx_R0 = _mm_or_si64(mmx_R0, mmx_R0_new); + + // now for the super-exciting fun of getting this data back into memory + SrcPtrP -= 4*p_step; + + // rotate 4x4 matrix + mmx_load1 = _mm_unpacklo_pi16(mmx_L1, mmx_R0); // 00 20 01 21 + mmx_load3 = _mm_unpackhi_pi16(mmx_L1, mmx_R0); // 02 22 03 23 + mmx_load2 = _mm_unpacklo_pi16(mmx_L0, mmx_R1); // 10 30 11 31 + mmx_load4 = _mm_unpackhi_pi16(mmx_L0, mmx_R1); // 12 32 13 33 + mmx_load5 = _mm_unpacklo_pi16(mmx_load1, mmx_load2); // 00 10 20 30 + mmx_load6 = _mm_unpackhi_pi16(mmx_load1, mmx_load2); // 01 11 21 31 + mmx_load7 = _mm_unpacklo_pi16(mmx_load3, mmx_load4); // 02 12 22 32 + mmx_load8 = _mm_unpackhi_pi16(mmx_load3, mmx_load4); // 03 13 23 33 + mmx_load5 = _mm_packs_pu16(mmx_load5, mmx_load5); + mmx_load6 = _mm_packs_pu16(mmx_load6, mmx_load6); + mmx_load7 = _mm_packs_pu16(mmx_load7, mmx_load7); + mmx_load8 = _mm_packs_pu16(mmx_load8, mmx_load8); + + //mmx_load1 = _mm_setr_pi16(0x8080, 0x80, 0, 0); + *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_load5); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load6); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load7); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load8); + + if (i++ == 1) + return; + + SrcPtrP += p_step; + goto STAGE; // next stage +} + +static void FilterChroma_Vert_sse(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const char *ClipTab) +{ + __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1); + __m64 mmx_zero = _mm_setzero_si64(), mmx_four=_mm_set1_pi16(4); + __m64 mmx_minus_one; + __m64 mmx_absdiff, mmx_diff; + __m64 mmx_L0, mmx_L1, mmx_L1_L0; + __m64 mmx_R0, mmx_R0_R1, mmx_R1; + __m64 mmx_load0, mmx_load1, mmx_load2, mmx_load3, mmx_load4, mmx_load5, mmx_load6, mmx_load7, mmx_load8; + __m64 mmx_C0, mmx_negative_C0, mmx_dif, mmx_match; + int match; + int i=0; + + mmx_minus_one = _mm_set1_pi32(-1); + SrcPtrP -= 1; + + STAGE: + + while (!Strength[i*2] && !Strength[i*2+1]) + { + SrcPtrP += p_step*4; + if (i++ == 1) // last stage + return; + } + + mmx_load0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP)); // La1 La0 Ra0 Ra1 --- --- --- --- + mmx_load1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lb1 Lb0 Rb0 Rb1 --- --- --- --- + mmx_load4 = _mm_unpacklo_pi8(mmx_load0, mmx_load1); // La1 Lb1 La0 Lb0 Ra0 Rb0 Ra1 Rb1 + mmx_load2 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lc1 Lc0 Rc0 Rc1 --- --- --- --- + mmx_load3 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Ld1 Ld0 Rd0 Rd1 --- --- --- --- + SrcPtrP+=p_step; + mmx_load5 = _mm_unpacklo_pi8(mmx_load2, mmx_load3); // Lc1 Ld1 Lc0 Ld0 Rc0 Rd0 Rc1 Rd1 + mmx_L1_L0 = _mm_unpacklo_pi16(mmx_load4, mmx_load5); // La1 Lb1 Lc1 Ld1 La0 Lb0 Lc0 Ld0 + mmx_R0_R1 = _mm_unpackhi_pi16(mmx_load4, mmx_load5); // Ra0 Rb0 Rc0 Rd0 Ra1 Rb1 Rc1 Rd1 + + // abs( R0 - L0 ) < Alpha + // MMX doesn't have unsigned compare, so we have to go to short + mmx_L0 = _mm_unpackhi_pi8(mmx_L1_L0, mmx_zero); // La0 Lb0 Lc0 Ld0 + mmx_R0 = _mm_unpacklo_pi8(mmx_R0_R1, mmx_zero); // Ra0 Rb0 Rc0 Rd0 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0); + mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do + mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // abs( R0 - R1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_R0 already populated + mmx_R1 = _mm_unpackhi_pi8(mmx_R0_R1, mmx_zero); // Ra1 Rb1 Rc1 Rd1 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1); + mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // abs(L0 - L1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_L0 already populated + mmx_L1 = _mm_unpacklo_pi8(mmx_L1_L0, mmx_zero); // La1 Lb1 Lc1 Ld1 + mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1); + mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 1) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // ok, now time to performn the actual calculation. hope it was worth it!! + + // tc0 = ClipTab[ Strng ] + 1 + mmx_C0 = _mm_setr_pi16(ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2+1]]+1, ClipTab[Strength[i*2+1]]+1); + mmx_negative_C0 = _mm_sub_pi16(mmx_zero, mmx_C0); + + // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + mmx_dif = mmx_R0; + mmx_dif = _mm_sub_pi16(mmx_dif, mmx_L0); + mmx_dif = _mm_slli_pi16(mmx_dif, 2); + mmx_dif = _mm_add_pi16(mmx_dif, mmx_L1); + mmx_dif = _mm_sub_pi16(mmx_dif, mmx_R1); + mmx_dif = _mm_add_pi16(mmx_dif, mmx_four); + mmx_dif = _mm_srai_pi16(mmx_dif, 3); + mmx_dif = _mm_min_pi16(mmx_dif, mmx_C0); + mmx_dif = _mm_max_pi16(mmx_dif, mmx_negative_C0); + mmx_dif = _mm_and_si64(mmx_dif, mmx_match); + + // L0 = (imgpel) iClip1(max_imgpel_value, L0 + dif); + mmx_L0 = _mm_add_pi16(mmx_L0, mmx_dif); + + // R0 = (imgpel) iClip1(max_imgpel_value, R0 - dif); + mmx_R0 = _mm_sub_pi16(mmx_R0, mmx_dif); + + // now for the super-exciting fun of getting this data back into memory + SrcPtrP -= 4*p_step; + + // rotate 4x4 matrix + mmx_load1 = _mm_unpacklo_pi16(mmx_L1, mmx_R0); // 00 20 01 21 + mmx_load2 = _mm_unpacklo_pi16(mmx_L0, mmx_R1); // 10 30 11 31 + mmx_load3 = _mm_unpackhi_pi16(mmx_L1, mmx_R0); // 02 22 03 23 + mmx_load4 = _mm_unpackhi_pi16(mmx_L0, mmx_R1); // 12 32 13 33 + mmx_load5 = _mm_unpacklo_pi16(mmx_load1, mmx_load2); // 00 10 20 30 + mmx_load6 = _mm_unpackhi_pi16(mmx_load1, mmx_load2); // 01 11 21 31 + mmx_load7 = _mm_unpacklo_pi16(mmx_load3, mmx_load4); // 02 12 22 32 + mmx_load8 = _mm_unpackhi_pi16(mmx_load3, mmx_load4); // 03 13 23 33 + mmx_load5 = _mm_packs_pu16(mmx_load5, mmx_load5); + mmx_load6 = _mm_packs_pu16(mmx_load6, mmx_load6); + mmx_load7 = _mm_packs_pu16(mmx_load7, mmx_load7); + mmx_load8 = _mm_packs_pu16(mmx_load8, mmx_load8); + + *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_load5); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load6); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load7); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load8); + + if (i++ == 1) + return; + + SrcPtrP += p_step; + goto STAGE; // next stage +} + +static void FilterStrongChroma_Vert_c(int p_step, imgpel *SrcPtrP, int Alpha, int Beta) +{ + int i; + for (i=0;i<8;i++) + { + imgpel L0 = SrcPtrP[0]; + imgpel R0 = SrcPtrP[1]; + if ( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = SrcPtrP[2]; + if ( abs(R0 - R1) < Beta ) + { + imgpel L1 = SrcPtrP[-1]; + if ( abs(L0 - L1) < Beta ) + { + SrcPtrP[0] = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + SrcPtrP[1] = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + } + } + } + SrcPtrP+=p_step; + } +} + +static void FilterChroma_Vert_c(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const byte *ClipTab) +{ + int i; + for (i=0;i<8;i++) + { + if (Strength[i>>1]) + { + imgpel L0 = *SrcPtrP; + imgpel *SrcPtrQ = SrcPtrP + 1; + imgpel R0 = *SrcPtrQ; + + if ( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *(SrcPtrQ + 1); + if ( abs(R0 - R1) < Beta ) + { + imgpel L1 = *(SrcPtrP - 1); + if ( abs(L0 - L1) < Beta ) + { + int tc0 = ClipTab[ Strength[(i*2)/4] ] * 1 + 1; + int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + *SrcPtrP = (imgpel) iClip1 ( 255, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 ( 255, R0 - dif) ; + + } + } + } + } + SrcPtrP+=p_step; + } +} + +void EdgeLoopChroma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, int uv, PixelPos pixMB1, Macroblock *MbP) +{ + // dir == 0 + imgpel** Img = image->img; + + if (pixMB1.available || (MbQ->DFDisableIdc == 0)) + { + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + + // Average QP of the two blocks + int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + int Alpha = ALPHA_TABLE[indexA]; + if (Alpha) + { + int indexB = iClip3(0, MAX_QP, QP + BetaOffset); + int Beta = BETA_TABLE [indexB]; + + if (Beta != 0) + { + const byte *ClipTab = CLIP_TAB[indexA]; + const int stride = image->stride; + imgpel *SrcPtrP = &(Img[pixMB1.pos_y >> 1][pixMB1.pos_x >> 1]); + + if (Strength[0] == 4) + { + FilterStrongChroma_Vert_sse(stride, SrcPtrP, Alpha, Beta); + } + else + { + FilterChroma_Vert_sse(stride, SrcPtrP, Alpha, Beta, Strength, ClipTab); + } + } + } + } +} + +void EdgeLoopChromaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p) +{ + // dir == 0 + imgpel** Img = image->img; + + int pel, Strng ; + int incP, incQ; + int C0, tc0, dif; + imgpel L0, R0; + int Alpha = 0, Beta = 0; + const byte* ClipTab = NULL; + int indexA, indexB; + VideoParameters *p_Vid = MbQ->p_Vid; + int StrengthIdx; + int QP; + int xQ, yQ; + PixelPos pixP, pixQ; + int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + byte fieldModeFilteringFlag; + Macroblock *MbP; + imgpel *SrcPtrP, *SrcPtrQ; + int width = image->stride; + + for( pel = 0 ; pel < 8 ; ++pel ) + { + xQ = edge; + yQ = pel; + getAffNeighbour(MbQ, xQ, yQ, p_Vid->mb_size[IS_CHROMA], &pixQ); + getAffNeighbour(MbQ, xQ - 1, yQ, p_Vid->mb_size[IS_CHROMA], &pixP); + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + StrengthIdx = ((MbQ->mb_field && !MbP->mb_field) ? pel << 1 :((pel >> 1) << 2) + (pel & 0x01)); + + if (pixP.available || (MbQ->DFDisableIdc == 0)) + { + if( (Strng = Strength[StrengthIdx]) != 0) + { + fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field); + incQ = 1; + incP = 1; + SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + + // Average QP of the two blocks + QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1; + + indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + Beta = BETA_TABLE [indexB] * bitdepth_scale; + ClipTab = CLIP_TAB[indexA]; + + + L0 = SrcPtrP[0] ; + R0 = SrcPtrQ[0] ; + + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel L1 = SrcPtrP[-incP]; + imgpel R1 = SrcPtrQ[ incQ]; + //if( ((abs( R0 - R1) - Beta ) & (abs(L0 - L1) - Beta )) < 0 ) + if( ((abs( R0 - R1) - Beta < 0) && (abs(L0 - L1) - Beta < 0 )) ) + { + if( Strng == 4 ) // INTRA strong filtering + { + SrcPtrQ[0] = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + SrcPtrP[0] = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + } + else + { + C0 = ClipTab[ Strng ] * bitdepth_scale; + tc0 = (C0 + 1); + dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + SrcPtrP[0] = (imgpel) iClip1 ( max_imgpel_value, L0 + dif ); + SrcPtrQ[0] = (imgpel) iClip1 ( max_imgpel_value, R0 - dif ); + } + } + } + } + } + } +} diff --git a/Src/h264dec/ldecod/src/filter_luma_horiz.c b/Src/h264dec/ldecod/src/filter_luma_horiz.c new file mode 100644 index 00000000..97438dfb --- /dev/null +++ b/Src/h264dec/ldecod/src/filter_luma_horiz.c @@ -0,0 +1,871 @@ +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" +#include <mmintrin.h> +#include <emmintrin.h> + +static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ; +static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ; +static const byte CLIP_TAB[52][5] = +{ + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1}, + { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3}, + { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6}, + { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16}, + { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25} +} ; + +// benski> used for unit testing, not in production code +static int CalculateMatches(int inc_dim, const imgpel *SrcPtrP, const imgpel *SrcPtrQ, int Alpha, int Beta) +{ + int match=0; + const imgpel *P_L1 = SrcPtrP - inc_dim; + const imgpel *Q_R1 = SrcPtrQ + inc_dim; + + + int pel; + for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP++, SrcPtrQ++, Q_R1++, P_L1++) + { + imgpel L0 = *SrcPtrP; + imgpel R0 = *SrcPtrQ; + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *Q_R1; + if ((abs( R0 - R1) < Beta)) + { + imgpel L1 = *P_L1; + if ((abs(L0 - L1) < Beta)) + { + match |= (1 << (pel*2)); + match |= (1 << (pel*2+1)); + } + } + } + } + return match; +} + +static void IntraStrongFilter_Luma_Horiz(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta) +{ + + imgpel *P_L1 = SrcPtrP - inc_dim; + imgpel *P_L2 = P_L1 - inc_dim; + const imgpel *P_L3 = P_L2 - inc_dim; + + imgpel *Q_R1 = SrcPtrQ + inc_dim; + imgpel *Q_R2 = Q_R1 + inc_dim; + const imgpel *Q_R3 = Q_R2 + inc_dim; + + + int pel; + for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP++, SrcPtrQ++, Q_R1++, P_L1++, Q_R2++, P_L2++, Q_R3++, P_L3++) + { + imgpel L0 = *SrcPtrP; + imgpel R0 = *SrcPtrQ; + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *Q_R1; + if ((abs( R0 - R1) < Beta)) + { + imgpel L1 = *P_L1; + if ((abs(L0 - L1) < Beta)) + { + imgpel R2 = *Q_R2; + imgpel L2 = *P_L2; + + int RL0 = L0 + R0; + int small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2)); + int aq = ( abs( R0 - R2) < Beta ) & small_gap; + int ap = ( abs( L0 - L2) < Beta ) & small_gap; + + if (ap) + { + int L1RL0 = L1 + RL0; + imgpel L3 = *P_L3; + *SrcPtrP = (imgpel) (( R1 + ((L1RL0) << 1) + L2 + 4) >> 3); + *P_L1 = (imgpel) (( L2 + L1RL0 + 2) >> 2); + *P_L2 = (imgpel) ((((L3 + L2) <<1) + L2 + L1RL0 + 4) >> 3); + } + else + { + *SrcPtrP = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ; + } + + if (aq) + { + imgpel R3 = *Q_R3; + *(SrcPtrQ ) = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3); + *Q_R1 = (imgpel) (( R2 + R0 + L0 + R1 + 2) >> 2); + *Q_R2 = (imgpel) ((((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3); + } + else + { + *SrcPtrQ = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2); + } + } + } + } + } +} + +#define LOAD_LINE_EPI16(reg, ptr) { reg = _mm_loadl_epi64((__m128i *)(ptr)); reg = _mm_unpacklo_epi8(reg, xmm_zero); } +static void IntraStrongFilter_Luma_Horiz_sse2(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, __m128i xmm_alpha, __m128i xmm_beta, __m128i xmm_match) +{ + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_smallgap; + __m128i xmm_ap, xmm_aq; + __m128i xmm_L3, xmm_L2, xmm_L1, xmm_L0, xmm_R0, xmm_R1, xmm_R2, xmm_R3; + __m128i xmm_4 = _mm_set1_epi16(4), xmm_2 = _mm_set1_epi16(2); + __m128i xmm_add, xmm_add2, xmm_acc, xmm_match_and_an; + __m128i xmm_absdiff, xmm_diff; + + LOAD_LINE_EPI16(xmm_L0, SrcPtrP); + LOAD_LINE_EPI16(xmm_R0, SrcPtrQ); + + // small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2)); + xmm_alpha = _mm_srai_epi16(xmm_alpha, 2); + xmm_alpha = _mm_add_epi16(xmm_alpha, xmm_2); + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0); + xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_smallgap = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha); + + LOAD_LINE_EPI16(xmm_R2, SrcPtrQ + 2*inc_dim); + + // (abs(R0 - R2) < Beta) & small_gap; + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R2); + xmm_absdiff =_mm_subs_epu16(xmm_R2, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_aq = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_aq = _mm_and_si128(xmm_aq, xmm_smallgap); + + LOAD_LINE_EPI16(xmm_L2, SrcPtrP - 2*inc_dim); + + // (abs(L0 - L2) < Beta) & small_gap; + xmm_diff=_mm_subs_epu16(xmm_L0, xmm_L2); + xmm_absdiff =_mm_subs_epu16(xmm_L2, xmm_L0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_ap = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_ap = _mm_and_si128(xmm_ap, xmm_smallgap); + + LOAD_LINE_EPI16(xmm_L1, SrcPtrP - inc_dim); + LOAD_LINE_EPI16(xmm_R1, SrcPtrQ + inc_dim); + LOAD_LINE_EPI16(xmm_L3, SrcPtrP - 3*inc_dim); + LOAD_LINE_EPI16(xmm_R3, SrcPtrQ + 3*inc_dim); + + xmm_match_and_an=_mm_and_si128(xmm_match, xmm_ap); + + // if(ap) SrcPtrP = (imgpel) (( R1 + ((L1 + L0 + R0) << 1) + L2 + 4) >> 3) + xmm_add = xmm_L1; + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_slli_epi16(xmm_add, 1); + xmm_add = _mm_add_epi16(xmm_add, xmm_R1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L2); + xmm_add = _mm_add_epi16(xmm_add, xmm_4); + xmm_add = _mm_srai_epi16(xmm_add, 3); + xmm_acc = _mm_and_si128(xmm_add, xmm_match_and_an); + + // if (ap) *P_L1 = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2); + xmm_add = xmm_L2; + xmm_add = _mm_add_epi16(xmm_add, xmm_L1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_2); + xmm_add = _mm_srai_epi16(xmm_add, 2); + xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an); + xmm_add2= xmm_L1; + xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2); + xmm_add=_mm_add_epi16(xmm_add, xmm_add2); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrP-inc_dim), xmm_add); + + + // if (ap) *P_L2 = (imgpel) ((((L3 + L2) <<1) + L2 + L1 + L0 + R0 + 4) >> 3); + xmm_add = xmm_L3; + xmm_add = _mm_add_epi16(xmm_add, xmm_L2); + xmm_add = _mm_slli_epi16(xmm_add, 1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L2); + xmm_add = _mm_add_epi16(xmm_add, xmm_L1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_4); + xmm_add = _mm_srai_epi16(xmm_add, 3); + xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an); + xmm_add2= xmm_L2; + xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2); + xmm_add=_mm_add_epi16(xmm_add, xmm_add2); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrP-2*inc_dim), xmm_add); + + // if (!ap) *SrcPtrP = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ; + xmm_add = xmm_L1; + xmm_add = _mm_slli_epi16(xmm_add, 1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R1); + xmm_add = _mm_add_epi16(xmm_add, xmm_2); + xmm_add = _mm_srai_epi16(xmm_add, 2); + xmm_add = _mm_and_si128(xmm_add, xmm_match); + xmm_add = _mm_andnot_si128(xmm_ap, xmm_add); + xmm_add2= xmm_L0; + //xmm_match_and_an=_mm_or_si128(xmm_match, xmm_ap); + xmm_add2=_mm_andnot_si128(xmm_match, xmm_add2); + xmm_add = _mm_add_epi16(xmm_add, xmm_add2); + xmm_add = _mm_add_epi16(xmm_add, xmm_acc); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_add); + + xmm_match_and_an=_mm_and_si128(xmm_match, xmm_aq); + + // if (aq) *(SrcPtrQ ) = (imgpel) (( L1 + ((R1 + L0 + R0) << 1) + R2 + 4) >> 3); + xmm_add = xmm_R1; + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_slli_epi16(xmm_add, 1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L1); + xmm_add = _mm_add_epi16(xmm_add, xmm_R2); + xmm_add = _mm_add_epi16(xmm_add, xmm_4); + xmm_add = _mm_srai_epi16(xmm_add, 3); + xmm_acc = _mm_and_si128(xmm_add, xmm_match_and_an); + + // if (aq) *Q_R1 = (imgpel) (( R2 + R0 + L0 + R1 + 2) >> 2); + xmm_add = xmm_R2; + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R1); + xmm_add = _mm_add_epi16(xmm_add, xmm_2); + xmm_add = _mm_srai_epi16(xmm_add, 2); + xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an); + xmm_add2= xmm_R1; + xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2); + xmm_add=_mm_add_epi16(xmm_add, xmm_add2); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrQ+inc_dim), xmm_add); + + // if (aq) *Q_R2 = (imgpel) ((((R3 + R2) <<1) + R2 + R1 + L0 + R0 + 4) >> 3); + xmm_add = xmm_R3; + xmm_add = _mm_add_epi16(xmm_add, xmm_R2); + xmm_add = _mm_slli_epi16(xmm_add, 1); + xmm_add = _mm_add_epi16(xmm_add, xmm_R2); + xmm_add = _mm_add_epi16(xmm_add, xmm_R1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L0); + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_4); + xmm_add = _mm_srai_epi16(xmm_add, 3); + xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an); + xmm_add2= xmm_R2; + xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2); + xmm_add=_mm_add_epi16(xmm_add, xmm_add2); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + + _mm_storel_epi64((__m128i *)(SrcPtrQ+2*inc_dim), xmm_add); + + // if (!aq) *SrcPtrQ = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2); + xmm_add = xmm_R1; + xmm_add = _mm_slli_epi16(xmm_add, 1); + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_L1); + xmm_add = _mm_add_epi16(xmm_add, xmm_2); + xmm_add = _mm_srai_epi16(xmm_add, 2); + xmm_add = _mm_and_si128(xmm_add, xmm_match); + xmm_add = _mm_andnot_si128(xmm_aq, xmm_add); + xmm_add2= xmm_R0; + //xmm_match_and_an=_mm_or_si128(xmm_match, xmm_aq); + xmm_add2=_mm_andnot_si128(xmm_match, xmm_add2); + xmm_add = _mm_add_epi16(xmm_add, xmm_add2); + xmm_add = _mm_add_epi16(xmm_add, xmm_acc); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + + _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_add); + + +} + +// benski> for some reason, Visual Studio 2008 only allows for 3 __m128i parameters, or else we'd pass a whole lot more for optimization reasons +// we could put this function straight into EdgeLoopLumaNormal_Horiz_sse2 if we think it's worth it +static void FilterLuma_Horiz_sse2(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, __m128i xmm_beta, int C0[2], __m128i xmm_match) +{ + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_C0 = _mm_setr_epi16(C0[0], C0[0], C0[0], C0[0], C0[1], C0[1], C0[1], C0[1]); // TODO: benski> probably a better way to do this. + __m128i xmm_negative_C0; + __m128i xmm_tc0; + __m128i xmm_L2, xmm_L1, xmm_L0, xmm_R0, xmm_R1, xmm_R2; + __m128i xmm_absdiff, xmm_diff; + __m128i xmm_dif; + __m128i xmm_4 = _mm_set1_epi16(4), xmm_1 = _mm_set1_epi16(1); + __m128i xmm_add; + __m128i xmm_ap, xmm_aq; + + xmm_negative_C0 = _mm_sub_epi16(xmm_zero, xmm_C0); + xmm_tc0 = xmm_C0; + + xmm_R2 = _mm_loadl_epi64((__m128i *)(SrcPtrQ + 2*inc_dim)); + xmm_R2 = _mm_unpacklo_epi8(xmm_R2, xmm_zero); + + xmm_R0 = _mm_loadl_epi64((__m128i *)(SrcPtrQ)); + xmm_R0 = _mm_unpacklo_epi8(xmm_R0, xmm_zero); + + // (abs(R0 - R2) < Beta); + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R2); + xmm_absdiff =_mm_subs_epu16(xmm_R2, xmm_R0); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_aq = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_add = _mm_srli_epi16(xmm_aq, 15); // convert 0xFFFF to 1 and 0x0000 to 0 + xmm_tc0 = _mm_adds_epu16(xmm_tc0, xmm_add); // tc0 = (C0 + ap + aq) ; + + xmm_L2 = _mm_loadl_epi64((__m128i *)(SrcPtrP - 2*inc_dim)); + xmm_L2 = _mm_unpacklo_epi8(xmm_L2, xmm_zero); + + xmm_L0 = _mm_loadl_epi64((__m128i *)(SrcPtrP)); + xmm_L0 = _mm_unpacklo_epi8(xmm_L0, xmm_zero); + + // (abs(L0 - L2) < Beta); + xmm_diff=_mm_subs_epu16(xmm_L2, xmm_L0); + xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_L2); + xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff); + xmm_ap = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_add = _mm_srli_epi16(xmm_ap, 15); // convert 0xFFFF to 1 and 0x0000 to 0 + xmm_tc0 = _mm_adds_epu16(xmm_tc0, xmm_add); // tc0 = (C0 + ap + aq) ; + + xmm_L1 = _mm_loadl_epi64((__m128i *)(SrcPtrP - inc_dim)); + xmm_L1 = _mm_unpacklo_epi8(xmm_L1, xmm_zero); + + xmm_R1 = _mm_loadl_epi64((__m128i *)(SrcPtrQ + inc_dim)); + xmm_R1 = _mm_unpacklo_epi8(xmm_R1, xmm_zero); + + // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + L1 - R1 + 4) >> 3 ); + xmm_dif = xmm_R0; + xmm_dif = _mm_sub_epi16(xmm_dif, xmm_L0); + xmm_dif = _mm_slli_epi16(xmm_dif, 2); + xmm_dif = _mm_add_epi16(xmm_dif, xmm_L1); + xmm_dif = _mm_sub_epi16(xmm_dif, xmm_R1); + xmm_dif = _mm_add_epi16(xmm_dif, xmm_4); + xmm_dif = _mm_srai_epi16(xmm_dif, 3); + xmm_dif = _mm_min_epi16(xmm_dif, xmm_tc0); + xmm_tc0 = _mm_sub_epi16(xmm_zero, xmm_tc0); + xmm_dif = _mm_max_epi16(xmm_dif, xmm_tc0); + xmm_dif = _mm_and_si128(xmm_dif, xmm_match); + + // if( ap ) *P_L1 += iClip3( -C0, C0, (L2 + ((L0 + R0 + 1) >> 1) - (L1<<1)) >> 1 ); + xmm_add = xmm_L0; + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_1); + xmm_add = _mm_srai_epi16(xmm_add, 1); + xmm_add = _mm_sub_epi16(xmm_add, xmm_L1); + xmm_add = _mm_sub_epi16(xmm_add, xmm_L1); + xmm_add = _mm_add_epi16(xmm_add, xmm_L2); + xmm_add = _mm_srai_epi16(xmm_add, 1); + xmm_add = _mm_min_epi16(xmm_add, xmm_C0); + xmm_add = _mm_max_epi16(xmm_add, xmm_negative_C0); + xmm_add = _mm_and_si128(xmm_add, xmm_ap); + xmm_add = _mm_and_si128(xmm_add, xmm_match); + xmm_add = _mm_add_epi16(xmm_add, xmm_L1); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrP-inc_dim), xmm_add); + + // *SrcPtrP = (imgpel) iClip1(max_imgpel_value, L0 + dif); + xmm_add = _mm_add_epi16(xmm_dif, xmm_L0); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_add); + + // *SrcPtrQ = (imgpel) iClip1(max_imgpel_value, R0 - dif); + xmm_add = _mm_sub_epi16(xmm_R0, xmm_dif); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_add); + + // if (aq) *Q_R1 += iClip3( -C0, C0, (R2 + ((L0 + R0 + 1) >> 1) - (R1<<1)) >> 1 ); + xmm_add = xmm_L0; + xmm_add = _mm_add_epi16(xmm_add, xmm_R0); + xmm_add = _mm_add_epi16(xmm_add, xmm_1); + xmm_add = _mm_srai_epi16(xmm_add, 1); + xmm_add = _mm_sub_epi16(xmm_add, xmm_R1); + xmm_add = _mm_sub_epi16(xmm_add, xmm_R1); + xmm_add = _mm_add_epi16(xmm_add, xmm_R2); + xmm_add = _mm_srai_epi16(xmm_add, 1); + xmm_add = _mm_min_epi16(xmm_add, xmm_C0); + xmm_add = _mm_max_epi16(xmm_add, xmm_negative_C0); + xmm_add = _mm_and_si128(xmm_add, xmm_aq); + xmm_add = _mm_and_si128(xmm_add, xmm_match); + xmm_add = _mm_add_epi16(xmm_add, xmm_R1); + xmm_add = _mm_packus_epi16(xmm_add, xmm_add); + _mm_storel_epi64((__m128i *)(SrcPtrQ+inc_dim), xmm_add); +} + +static void FilterLuma_Horiz(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta, int C0, int max_imgpel_value) +{ + imgpel *P_L1 = SrcPtrP - inc_dim; + const imgpel *P_L2 = P_L1 - inc_dim; + imgpel *Q_R1 = SrcPtrQ + inc_dim; + const imgpel *Q_R2 = Q_R1 + inc_dim; + + int pel; + for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP++, SrcPtrQ++, Q_R1++, P_L1++, Q_R2++, P_L2++) + { + imgpel L0 = *SrcPtrP; + imgpel R0 = *SrcPtrQ; + if( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = *Q_R1; + if (abs( R0 - R1) < Beta) + { + imgpel L1 = *P_L1; + if (abs(L0 - L1) < Beta) + { + imgpel R2 = *Q_R2; + imgpel L2 = *P_L2; + + int RL0 = (L0 + R0 + 1) >> 1; + int aq = (abs(R0 - R2) < Beta); + int ap = (abs(L0 - L2) < Beta); + + //int C0 = ClipTab[ strength ] * bitdepth_scale; + int tc0 = (C0 + ap + aq) ; + int dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + if( ap ) + *P_L1 += iClip3( -C0, C0, (L2 + RL0 - (L1<<1)) >> 1 ); + *SrcPtrP = (imgpel) iClip1(max_imgpel_value, L0 + dif); + + *SrcPtrQ = (imgpel) iClip1(max_imgpel_value, R0 - dif); + if( aq ) + *Q_R1 += iClip3( -C0, C0, (R2 + RL0 - (R1<<1)) >> 1 ); + } + } + } + } +} + + +/* benski> this exists for unit testing, not used in production code */ +static int CalculateMatches_sse2(int inc_dim, const imgpel *SrcPtrP, const imgpel *SrcPtrQ, int Alpha, int Beta, __m128i *xmm_result) +{ + int match; + __m128i xmm_L0, xmm_R0, xmm_R1, xmm_L1; + __m128i xmm_absdiff, xmm_diff, xmm_alpha, xmm_beta, xmm_zero, xmm_strength; + + xmm_zero = _mm_setzero_si128(); + xmm_alpha = _mm_set1_epi16((uint16_t)Alpha); + xmm_beta= _mm_set1_epi16((uint16_t)Beta); + + // abs( R0 - L0 ) + LOAD_LINE_EPI16(xmm_L0, SrcPtrP); + LOAD_LINE_EPI16(xmm_R0, SrcPtrQ); + + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0); + xmm_absdiff=_mm_subs_epu16(xmm_L0, xmm_R0); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if( abs( R0 - L0 ) < Alpha ) + xmm_strength = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return 0; + + // abs(R0 - R1) + LOAD_LINE_EPI16(xmm_R1, SrcPtrQ+inc_dim); + xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R1); + xmm_absdiff=_mm_subs_epu16(xmm_R1, xmm_R0); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if (abs( R0 - R1) < Beta) + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return 0; + + // abs(L0 - L1) + LOAD_LINE_EPI16(xmm_L1, SrcPtrP-inc_dim); + xmm_diff=_mm_subs_epu16(xmm_L1, xmm_L0); + xmm_absdiff=_mm_subs_epu16(xmm_L0, xmm_L1); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if ((abs(L0 - L1) < Beta)) + xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta); + xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return 0; + + *xmm_result = xmm_strength; + return match; +} + +void EdgeLoopLumaNormal_Horiz_sse2(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p) +{ + // dir == 1 + __m128i xmm_L0, xmm_R0, xmm_R1, xmm_L1; + __m128i xmm_absdiff, xmm_diff, xmm_alpha, xmm_beta, xmm_comphi, xmm_complo, xmm_zero, xmm_127; + __m128i xmm_strength; + VideoParameters *p_Vid = MbQ->p_Vid; + int yQ = (edge < MB_BLOCK_SIZE ? edge - 1: 0); + int pelmatch; + + PixelPos pixMB1; + p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_LUMA], &pixMB1); + + if (pixMB1.available || (MbQ->DFDisableIdc== 0)) + { + int bitdepth_scale = pl ? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA]; + + Macroblock *MbP = &(p_Vid->mb_data[pixMB1.mb_addr]); + + // Average QP of the two blocks + int QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + if (Alpha) + { + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + int Beta = BETA_TABLE [indexB] * bitdepth_scale; + + if (Beta !=0) + { + int match; + PixelPos pixMB2; + const byte *ClipTab = CLIP_TAB [indexA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + int inc_dim = image->stride; + imgpel *SrcPtrQ; + imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x; + + p_Vid->getNeighbour0X(MbQ, yQ+1, p_Vid->mb_size[IS_LUMA], &pixMB2); + SrcPtrQ = image->base_address + pixMB2.pos_y * image->stride + pixMB2.pos_x; + + xmm_strength = _mm_load_si128((__m128i *)Strength); + xmm_127 = _mm_set1_epi8(127); + xmm_strength = _mm_adds_epu8(xmm_strength, xmm_127); + xmm_strength = _mm_srai_epi16(xmm_strength, 15); // shift so it's all 0xFFFF or 0x0000 + + // abs( R0 - L0 ) + xmm_R0 = _mm_loadu_si128((__m128i *)SrcPtrQ); + xmm_L0 = _mm_loadu_si128((__m128i *)SrcPtrP); + xmm_diff=_mm_subs_epu8(xmm_R0, xmm_L0); + xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_R0); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if( abs( R0 - L0 ) < Alpha ) + // SSE2 doesn't have unsigned <, so we have to go to short + xmm_zero = _mm_setzero_si128(); + xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero); + xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero); + xmm_alpha = _mm_set1_epi16((uint16_t)Alpha); + xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_alpha); + xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_alpha); + xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi); + xmm_strength = _mm_and_si128(xmm_strength, xmm_complo); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + // abs(R0 - R1) + xmm_R1 = _mm_loadu_si128((__m128i *)(SrcPtrQ+inc_dim)); + xmm_diff=_mm_subs_epu8(xmm_R0, xmm_R1); + xmm_absdiff=_mm_subs_epu8(xmm_R1, xmm_R0); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if (abs( R0 - R1) < Beta) + // SSE2 doesn't have unsigned <, so we have to go to short + xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero); + xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero); + xmm_beta= _mm_set1_epi16((uint16_t)Beta); + xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta); + xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta); + xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi); + xmm_strength = _mm_and_si128(xmm_strength, xmm_complo); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + // abs(L0 - L1) + xmm_L1 = _mm_loadu_si128((__m128i *)(SrcPtrP-inc_dim)); + xmm_diff=_mm_subs_epu8(xmm_L1, xmm_L0); + xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_L1); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if ((abs(L0 - L1) < Beta)) + // SSE2 doesn't have unsigned <, so we have to go to short + xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero); + xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero); + xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta); + xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta); + xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi); + xmm_strength = _mm_and_si128(xmm_strength, xmm_complo); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + pelmatch = match & 0xFF; + if (pelmatch) + { + byte strength = Strength[0]; + + xmm_complo = _mm_unpacklo_epi8(xmm_strength, xmm_strength); + + switch(strength) + { + case 4: // INTRA strong + { + assert(Strength[4] == 4); + IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_alpha, xmm_beta, xmm_complo); + } + break; + default: + { + int C[2] = { ClipTab[strength] * bitdepth_scale, ClipTab[Strength[4]] * bitdepth_scale }; + FilterLuma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_beta, C, xmm_complo); + } + break; + } + } + pelmatch = match & 0xFF00; + if (pelmatch) + { + byte strength = Strength[8]; + + xmm_comphi = _mm_unpackhi_epi8(xmm_strength, xmm_strength); + + switch(strength) + { + case 4: // INTRA strong + { + assert(Strength[12] == 4); + IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_alpha, xmm_beta, xmm_comphi); + } + break; + default: + { + int C[2] = { ClipTab[strength] * bitdepth_scale, ClipTab[Strength[12]] * bitdepth_scale }; + FilterLuma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_beta, C, xmm_comphi); + } + break; + } + } + } + } + } +} + + +void EdgeLoopLuma_Horiz_YUV420(VideoImage *image, const byte strength[4], Macroblock *MbQ, PixelPos pixMB1, Macroblock *MbP) +{ + // dir == 1 + __m128i xmm_L0, xmm_R0, xmm_R1, xmm_L1; + __m128i xmm_absdiff, xmm_diff, xmm_alpha, xmm_beta, xmm_comphi, xmm_complo, xmm_zero, xmm_127; + __m128i xmm_strength; + VideoParameters *p_Vid = MbQ->p_Vid; + int pelmatch; + int i; +__declspec(align(32)) uint8_t Strength[16]; + + for (i=0;i<16;i++) + { +Strength[i] = strength[i/4]; + } + + if (pixMB1.available || (MbQ->DFDisableIdc== 0)) + { + // Average QP of the two blocks + int QP = (MbP->qp + MbQ->qp + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int Alpha = ALPHA_TABLE[indexA]; + if (Alpha) + { + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + int Beta = BETA_TABLE [indexB]; + + if (Beta !=0) + { + int match; + const byte *ClipTab = CLIP_TAB [indexA]; + int inc_dim = image->stride; + + imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * inc_dim + pixMB1.pos_x; + imgpel *SrcPtrQ = SrcPtrP + inc_dim; + + xmm_strength = _mm_load_si128((__m128i *)Strength); + xmm_127 = _mm_set1_epi8(127); + xmm_strength = _mm_adds_epu8(xmm_strength, xmm_127); + xmm_strength = _mm_srai_epi16(xmm_strength, 15); // shift so it's all 0xFFFF or 0x0000 + + // abs( R0 - L0 ) + xmm_L0 = _mm_loadu_si128((__m128i *)SrcPtrP); + xmm_R0 = _mm_loadu_si128((__m128i *)SrcPtrQ); + xmm_diff=_mm_subs_epu8(xmm_R0, xmm_L0); + xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_R0); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if( abs( R0 - L0 ) < Alpha ) + // SSE2 doesn't have unsigned <, so we have to go to short + xmm_zero = _mm_setzero_si128(); + xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero); + xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero); + xmm_alpha = _mm_set1_epi16((uint16_t)Alpha); + xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_alpha); + xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_alpha); + xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi); + xmm_strength = _mm_and_si128(xmm_strength, xmm_complo); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + // abs(R0 - R1) + xmm_R1 = _mm_loadu_si128((__m128i *)(SrcPtrQ+inc_dim)); + xmm_diff=_mm_subs_epu8(xmm_R0, xmm_R1); + xmm_absdiff=_mm_subs_epu8(xmm_R1, xmm_R0); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if (abs( R0 - R1) < Beta) + // SSE2 doesn't have unsigned <, so we have to go to short + xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero); + xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero); + xmm_beta= _mm_set1_epi16((uint16_t)Beta); + xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta); + xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta); + xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi); + xmm_strength = _mm_and_si128(xmm_strength, xmm_complo); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + // abs(L0 - L1) + xmm_L1 = _mm_loadu_si128((__m128i *)(SrcPtrP-inc_dim)); + xmm_diff=_mm_subs_epu8(xmm_L1, xmm_L0); + xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_L1); + xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff); + + // if ((abs(L0 - L1) < Beta)) + // SSE2 doesn't have unsigned <, so we have to go to short + xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero); + xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero); + xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta); + xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta); + xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi); + xmm_strength = _mm_and_si128(xmm_strength, xmm_complo); + match = _mm_movemask_epi8(xmm_strength); + if (match == 0) + return; + + pelmatch = match & 0xFF; + if (pelmatch) + { + byte strength = Strength[0]; + + xmm_complo = _mm_unpacklo_epi8(xmm_strength, xmm_strength); + + switch(strength) + { + case 4: // INTRA strong + { + assert(Strength[4] == 4); + IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_alpha, xmm_beta, xmm_complo); + } + break; + default: + { + int C[2] = { ClipTab[strength], ClipTab[Strength[4]] }; + FilterLuma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_beta, C, xmm_complo); + } + break; + } + } + pelmatch = match & 0xFF00; + if (pelmatch) + { + byte strength = Strength[8]; + + xmm_comphi = _mm_unpackhi_epi8(xmm_strength, xmm_strength); + + switch(strength) + { + case 4: // INTRA strong + { + assert(Strength[12] == 4); + IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_alpha, xmm_beta, xmm_comphi); + } + break; + default: + { + int C[2] = { ClipTab[strength], ClipTab[Strength[12]] }; + FilterLuma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_beta, C, xmm_comphi); + } + break; + } + } + } + } + } +} + + +void EdgeLoopLumaNormal_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p) +{ + // dir == 1 + VideoParameters *p_Vid = MbQ->p_Vid; + int yQ = (edge < MB_BLOCK_SIZE ? edge - 1: 0); + + PixelPos pixMB1; + p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_LUMA], &pixMB1); + + if (pixMB1.available || (MbQ->DFDisableIdc== 0)) + { + int bitdepth_scale = pl ? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA]; + + Macroblock *MbP = &(p_Vid->mb_data[pixMB1.mb_addr]); + + // Average QP of the two blocks + int QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + if (Alpha) + { + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + int Beta = BETA_TABLE [indexB] * bitdepth_scale; + + if (Beta !=0) + { + PixelPos pixMB2; + const byte *ClipTab = CLIP_TAB [indexA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + int inc_dim = image->stride; + int pel; + imgpel *SrcPtrQ; + imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x; + + p_Vid->getNeighbour0X(MbQ, ++yQ, p_Vid->mb_size[IS_LUMA], &pixMB2); + SrcPtrQ = image->base_address + pixMB2.pos_y * image->stride + pixMB2.pos_x; + + for( pel = 0 ; pel < MB_BLOCK_SIZE ; pel+=BLOCK_SIZE) + { + byte strength = Strength[pel]; + + switch(strength) + { + case 0: + break; + case 4: // INTRA strong + { + IntraStrongFilter_Luma_Horiz(inc_dim, SrcPtrP+pel, SrcPtrQ+pel, Alpha, Beta); + } + break; + default: + { + int C0 = ClipTab[strength] * bitdepth_scale; + FilterLuma_Horiz(inc_dim, SrcPtrP+pel, SrcPtrQ+pel, Alpha, Beta, C0, max_imgpel_value); + } + break; + } + + } + } + } + } +} diff --git a/Src/h264dec/ldecod/src/filter_luma_vert.c b/Src/h264dec/ldecod/src/filter_luma_vert.c new file mode 100644 index 00000000..6d994217 --- /dev/null +++ b/Src/h264dec/ldecod/src/filter_luma_vert.c @@ -0,0 +1,554 @@ +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" +#include <mmintrin.h> +#include <emmintrin.h> + +static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ; +static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ; +static const byte CLIP_TAB[52][5] = +{ + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1}, + { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3}, + { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6}, + { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16}, + { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25} +} ; + +static void IntraStrongFilter_Luma_Vert(int p_step, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta) +{ + + int pel; + for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP+=p_step, SrcPtrQ+=p_step) + { + imgpel L0 = SrcPtrP[0]; + imgpel R0 = SrcPtrQ[0]; + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = SrcPtrQ[1]; + imgpel L1 = SrcPtrP[-1]; + if ((abs( R0 - R1) < Beta) && (abs(L0 - L1) < Beta)) + { + imgpel R2 = SrcPtrQ[2]; + imgpel L2 = SrcPtrP[-2]; + + int RL0 = L0 + R0; + int small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2)); + int aq = ( abs( R0 - R2) < Beta ) & small_gap; + int ap = ( abs( L0 - L2) < Beta ) & small_gap; + + if (ap) + { + int L1RL0 = L1 + RL0; + imgpel L3 = SrcPtrP[-3]; + SrcPtrP[0] = (imgpel) (( R1 + ((L1RL0) << 1) + L2 + 4) >> 3); + SrcPtrP[-1] = (imgpel) (( L2 + L1RL0 + 2) >> 2); + SrcPtrP[-2] = (imgpel) ((((L3 + L2) <<1) + L2 + L1RL0 + 4) >> 3); + } + else + { + *SrcPtrP = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ; + } + + if (aq) + { + imgpel R3 = SrcPtrQ[3]; + SrcPtrQ[0] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3); + SrcPtrQ[1] = (imgpel) (( R2 + R0 + L0 + R1 + 2) >> 2); + SrcPtrQ[2] = (imgpel) ((((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3); + } + else + { + SrcPtrQ[0] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2); + } + } + } + } +} + +static void FilterLuma_Vert(int p_step, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta, int C0, int max_imgpel_value) +{ + int pel; + for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP+=p_step, SrcPtrQ+=p_step) + { + imgpel L0 = SrcPtrP[0]; + imgpel R0 = SrcPtrQ[0]; + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel R1 = SrcPtrQ[1]; + if (abs( R0 - R1) < Beta) + { + imgpel L1 = SrcPtrP[-1]; + if (abs(L0 - L1) < Beta) + { + imgpel R2 = SrcPtrQ[2]; + imgpel L2 = SrcPtrP[-2]; + + int RL0 = (L0 + R0 + 1) >> 1; + int aq = (abs(R0 - R2) < Beta); + int ap = (abs(L0 - L2) < Beta); + + //int C0 = ClipTab[ *Strength ] * bitdepth_scale; + int tc0 = (C0 + ap + aq) ; + int dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + if( ap ) + SrcPtrP[-1] += iClip3( -C0, C0, (L2 + RL0 - (L1<<1)) >> 1 ); + SrcPtrP[0] = (imgpel) iClip1(max_imgpel_value, L0 + dif); + + SrcPtrQ[0] = (imgpel) iClip1(max_imgpel_value, R0 - dif); + if( aq ) + SrcPtrQ[1] += iClip3( -C0, C0, (R2 + RL0 - (R1<<1)) >> 1 ); + } + } + } + } +} + +void EdgeLoopLumaNormal_Vert(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p) +{ + // dir == 0 + imgpel **Img = image->img; + VideoParameters *p_Vid = MbQ->p_Vid; + int xQ = edge - 1; + + PixelPos pixMB1; + p_Vid->getNeighbourX0(MbQ, xQ, p_Vid->mb_size[IS_LUMA], &pixMB1); + + if (pixMB1.available || (MbQ->DFDisableIdc== 0)) + { + int bitdepth_scale = pl ? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA]; + ptrdiff_t p_step = image->stride; + + Macroblock *MbP = &(p_Vid->mb_data[pixMB1.mb_addr]); + + // Average QP of the two blocks + int QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + + int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + int Beta = BETA_TABLE [indexB] * bitdepth_scale; + + if (Alpha != 0 && Beta !=0) + { + PixelPos pixMB2; + const byte *ClipTab = CLIP_TAB [indexA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + int pel; + imgpel *SrcPtrQ; + imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x; + + p_Vid->getNeighbourX0(MbQ, ++xQ, p_Vid->mb_size[IS_LUMA], &pixMB2); + SrcPtrQ = image->base_address + pixMB2.pos_y * image->stride + pixMB2.pos_x; + + for( pel = 0 ; pel < MB_BLOCK_SIZE ; pel+=BLOCK_SIZE) + { + byte strength = Strength[pel]; + + switch(strength) + { + case 0: + break; + case 4: // INTRA strong + { + IntraStrongFilter_Luma_Vert(p_step, SrcPtrP, SrcPtrQ, Alpha, Beta); + } + break; + default: + { + int C0 = ClipTab[strength] * bitdepth_scale; + FilterLuma_Vert(p_step, SrcPtrP, SrcPtrQ, Alpha, Beta, C0, max_imgpel_value); + } + break; + } + SrcPtrP += p_step * BLOCK_SIZE; + SrcPtrQ += p_step * BLOCK_SIZE; + } + } + } +} + + +static void FilterLuma_Vert_sse2(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const byte *ClipTab) +{ + + __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1); + __m64 mmx_zero = _mm_setzero_si64(), mmx_one, mmx_four=_mm_set1_pi16(4); + __m64 mmx_minus_one; + __m64 mmx_absdiff, mmx_diff; + __m64 mmx_L0, mmx_L1, mmx_L2, mmx_L0_R0; + __m64 mmx_R0, mmx_R1_R2, mmx_R1, mmx_R2; + __m64 mmx_load0, mmx_load1, mmx_load2, mmx_load3, mmx_load4, mmx_load5, mmx_load6, mmx_load7, mmx_load8; + __m64 mmx_ap, mmx_aq, mmx_C0, mmx_negative_C0, mmx_tc0, mmx_dif, mmx_acc, mmx_match; + int match; + int i=0; + + mmx_minus_one = _mm_set1_pi32(-1); + mmx_one = _mm_sub_pi16(mmx_zero, mmx_minus_one); // dunno if this'll be faster than _mm_set1_pi16 or not + SrcPtrP -= 2; + + STAGE: + + while (!Strength[i]) + { + SrcPtrP += p_step << 2; + if (i++ == 3) // last stage + return; + } + + mmx_load0 = (*(__m64 *)(SrcPtrP)); // La2 La1 La0 Ra0 Ra1 Ra2 --- --- + mmx_load1 = (*(__m64 *)(SrcPtrP+=p_step)); // Lb2 Lb1 Lb0 Rb0 Rb1 Rb2 --- --- + mmx_load4 = _mm_unpacklo_pi8(mmx_load0, mmx_load1); // La2 Lb2 La1 Lb1 La0 Lb0 Ra0 Rb0 * + mmx_load2 = (*(__m64 *)(SrcPtrP+=p_step)); // Lc2 Lc1 Lc0 Rc0 Rc1 Rc2 --- --- + mmx_load3 = (*(__m64 *)(SrcPtrP+=p_step)); // Ld2 Ld1 Ld0 Rd0 Rd1 Rd2 --- --- + SrcPtrP+=p_step; + mmx_load5 = _mm_unpacklo_pi8(mmx_load2, mmx_load3); // Lc2 Ld2 Lc1 Ld1 Lc0 Ld0 Rc0 Rd0 * + mmx_L0_R0 = _mm_unpackhi_pi16(mmx_load4, mmx_load5); // La0 Lb0 Lc0 Ld0 Ra0 Rb0 Rc0 Rd0 + + // abs( R0 - L0 ) < Alpha + // MMX doesn't have unsigned compare, so we have to go to short + mmx_L0 = _mm_unpacklo_pi8(mmx_L0_R0, mmx_zero); // La0 Lb0 Lc0 Ld0 + mmx_R0 = _mm_unpackhi_pi8(mmx_L0_R0, mmx_zero); // Ra0 Rb0 Rc0 Rd0 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0); + mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do + mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 3) // last stage + return; + + goto STAGE; // start the process over from next position + } + + mmx_load6 = _mm_unpackhi_pi8(mmx_load0, mmx_load1); // Ra1 Rb1 Ra2 Rb2 --- --- --- --- * + mmx_load7 = _mm_unpackhi_pi8(mmx_load2, mmx_load3); // Rc1 Rd1 Rc2 Rd2 --- --- --- --- * + mmx_R1_R2 = _mm_unpacklo_pi16(mmx_load6, mmx_load7); // Ra1 Rb1 Rc1 Rd1 Ra2 Rb2 Rc2 Rd2 + + // abs( R0 - R1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_R0 already populated + mmx_R1 = _mm_unpacklo_pi8(mmx_R1_R2, mmx_zero); // Ra1 Rb1 Rc1 Rd1 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1); + mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 3) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // abs(L0 - L1) < Beta + // MMX doesn't have unsigned compare, so we have to go to short + // mmx_L0 already populated + // mmx_load4: La2 Lb2 La1 Lb1 La0 Lb0 --- --- + // mmx_load5: Lc2 Ld2 Lc1 Ld1 Lc0 Ld0 --- --- + mmx_load4 = _mm_unpacklo_pi16(mmx_load4, mmx_load5); // La2 Lb2 Lc2 Ld2 La1 Lb1 Lc1 Ld1 + mmx_L1 = _mm_unpackhi_pi8(mmx_load4, mmx_zero); // La1 Lb1 Lc1 Ld1 + mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1); + mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one); + mmx_match = _mm_and_si64(mmx_match, mmx_absdiff); + match = _mm_movemask_pi8(mmx_match); + if (match == 0) + { + if (i++ == 3) // last stage + return; + + goto STAGE; // start the process over from next position + } + + // ok, now time to performn the actual calculation. hope it was worth it!! + + // ap = (abs(L0 - L2) < Beta); + // finish loading L2 + mmx_L2 = _mm_unpacklo_pi8(mmx_load4, mmx_zero); // La1 Lb1 Lc1 Ld1 + mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L2); + mmx_absdiff =_mm_subs_pu16(mmx_L2, mmx_L0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_ap = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_tc0 = _mm_add_pi16(mmx_ap, mmx_one); // a clever trick. add one to essential do !mmx_absdiff (since mmx_diff will == 0xFFFF when true) + + // aq = (abs(R0 - R2) < Beta); + // finish loading R2 + // mmx_R1_R2: Ra1 Rb1 Rc1 Rd1 Ra2 Rb2 Rc2 Rd2 + mmx_R2 = _mm_unpackhi_pi8(mmx_R1_R2, mmx_zero); // Ra2 Rb2 Rc2 Rd2 + mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R2); + mmx_absdiff =_mm_subs_pu16(mmx_R2, mmx_R0); + mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff); + mmx_aq = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one); + mmx_tc0 = _mm_add_pi16(mmx_tc0, _mm_add_pi16(mmx_aq, mmx_one)); // a clever trick. add one to essential do !mmx_absdiff (since mmx_diff will == 0xFFFF when true) + + // tc0 = (C0 + ap + aq) ; + mmx_C0 = _mm_set1_pi16(ClipTab[Strength[i]]); + mmx_negative_C0 = _mm_sub_pi16(mmx_zero, mmx_C0); + mmx_tc0 = _mm_add_pi16(mmx_tc0, mmx_C0); + + + // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + mmx_dif = mmx_R0; + mmx_dif = _mm_sub_pi16(mmx_dif, mmx_L0); + mmx_dif = _mm_slli_pi16(mmx_dif, 2); + mmx_dif = _mm_add_pi16(mmx_dif, mmx_L1); + mmx_dif = _mm_sub_pi16(mmx_dif, mmx_R1); + mmx_dif = _mm_add_pi16(mmx_dif, mmx_four); + mmx_dif = _mm_srai_pi16(mmx_dif, 3); + mmx_dif = _mm_min_pi16(mmx_dif, mmx_tc0); + mmx_tc0 = _mm_sub_pi16(mmx_zero, mmx_tc0); + mmx_dif = _mm_max_pi16(mmx_dif, mmx_tc0); + mmx_dif = _mm_and_si64(mmx_dif, mmx_match); + + // TODO: benski> is it worth checking for_mm_movemask_pi8(ap) to see if we can skip this? + // if( ap ) L1 += iClip3( -C0, C0, (L2 + ((L0 + R0 + 1) >> 1) - (L1<<1)) >> 1 ); + mmx_acc = mmx_L0; + mmx_acc = _mm_add_pi16(mmx_acc, mmx_R0); + mmx_acc = _mm_add_pi16(mmx_acc, mmx_one); + mmx_acc = _mm_srai_pi16(mmx_acc, 1); + mmx_acc = _mm_sub_pi16(mmx_acc, mmx_L1); + mmx_acc = _mm_sub_pi16(mmx_acc, mmx_L1); + mmx_acc = _mm_add_pi16(mmx_acc, mmx_L2); + mmx_acc = _mm_srai_pi16(mmx_acc, 1); + mmx_acc = _mm_min_pi16(mmx_acc, mmx_C0); + mmx_acc = _mm_max_pi16(mmx_acc, mmx_negative_C0); + mmx_acc = _mm_andnot_si64(mmx_ap, mmx_acc); + mmx_acc = _mm_and_si64(mmx_acc, mmx_match); + mmx_L1 = _mm_add_pi16(mmx_L1, mmx_acc); + + + //if( aq ) R1 += iClip3( -C0, C0, (R2 + RL0 - (R1<<1)) >> 1 ); + mmx_acc = mmx_L0; + mmx_acc = _mm_add_pi16(mmx_acc, mmx_R0); + mmx_acc = _mm_add_pi16(mmx_acc, mmx_one); + mmx_acc = _mm_srai_pi16(mmx_acc, 1); + mmx_acc = _mm_sub_pi16(mmx_acc, mmx_R1); + mmx_acc = _mm_sub_pi16(mmx_acc, mmx_R1); + mmx_acc = _mm_add_pi16(mmx_acc, mmx_R2); + mmx_acc = _mm_srai_pi16(mmx_acc, 1); + mmx_acc = _mm_min_pi16(mmx_acc, mmx_C0); + mmx_acc = _mm_max_pi16(mmx_acc, mmx_negative_C0); + mmx_acc = _mm_andnot_si64(mmx_aq, mmx_acc); + mmx_acc = _mm_and_si64(mmx_acc, mmx_match); + mmx_R1 = _mm_add_pi16(mmx_R1, mmx_acc); + + // L0 = (imgpel) iClip1(max_imgpel_value, L0 + dif); + mmx_L0 = _mm_add_pi16(mmx_L0, mmx_dif); + + // R0 = (imgpel) iClip1(max_imgpel_value, R0 - dif); + mmx_R0 = _mm_sub_pi16(mmx_R0, mmx_dif); + + + // now for the super-exciting fun of getting this data back into memory + SrcPtrP -= 4*p_step; + //SrcPtrQ -= 4*p_step; + SrcPtrP++; + + // rotate 4x4 matrix + mmx_load1 = _mm_unpacklo_pi16(mmx_L1, mmx_R0); // 00 20 01 21 + mmx_load2 = _mm_unpacklo_pi16(mmx_L0, mmx_R1); // 10 30 11 31 + mmx_load3 = _mm_unpackhi_pi16(mmx_L1, mmx_R0); // 02 22 03 23 + mmx_load4 = _mm_unpackhi_pi16(mmx_L0, mmx_R1); // 12 32 13 33 + mmx_load5 = _mm_unpacklo_pi16(mmx_load1, mmx_load2); // 00 10 20 30 + mmx_load6 = _mm_unpackhi_pi16(mmx_load1, mmx_load2); // 01 11 21 31 + mmx_load7 = _mm_unpacklo_pi16(mmx_load3, mmx_load4); // 02 12 22 32 + mmx_load8 = _mm_unpackhi_pi16(mmx_load3, mmx_load4); // 03 13 23 33 + mmx_load5 = _mm_packs_pu16(mmx_load5, mmx_load5); + mmx_load6 = _mm_packs_pu16(mmx_load6, mmx_load6); + mmx_load7 = _mm_packs_pu16(mmx_load7, mmx_load7); + mmx_load8 = _mm_packs_pu16(mmx_load8, mmx_load8); + + //mmx_load1 = _mm_setr_pi16(0x8080, 0x80, 0, 0); + *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_load5); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load6); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load7); + *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load8); + + if (i++ == 3) + return; + + //SrcPtrQ += 2; + SrcPtrP += p_step; + //SrcPtrQ += p_step; + SrcPtrP--; + goto STAGE; // next stage +} + +/* assumptions: YUV 420, getNonAffNeighbour */ +void EdgeLoopLuma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, PixelPos pixMB1, Macroblock *MbP) +{ + // dir == 0 + if (MbQ->DFDisableIdc== 0) + { + ptrdiff_t p_step = image->stride; + + // Average QP of the two blocks + int QP = (MbP->qp + MbQ->qp + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + + int Alpha = ALPHA_TABLE[indexA]; + int Beta = BETA_TABLE [indexB]; + + if (Alpha != 0 && Beta !=0) + { + imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x; + + if (Strength[0] == 4) // if strong filter is used, all blocks will be strong + { + imgpel *SrcPtrQ = SrcPtrP+1; + int pel; + for( pel = 0 ; pel < BLOCK_SIZE ; pel++) + { + IntraStrongFilter_Luma_Vert(p_step, SrcPtrP, SrcPtrQ, Alpha, Beta); + SrcPtrP += p_step * BLOCK_SIZE; + SrcPtrQ += p_step * BLOCK_SIZE; + } + } + else + { + const byte *ClipTab = CLIP_TAB [indexA]; + FilterLuma_Vert_sse2(p_step, SrcPtrP, Alpha, Beta, Strength, ClipTab); + } + } + } +} + +void EdgeLoopLumaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p) +{ + // dir == 0 + imgpel **Img = image->img; + int width = image->stride; + int pel, ap = 0, aq = 0, Strng ; + + int C0, tc0, dif; + imgpel L0, R0; + int Alpha = 0, Beta = 0 ; + const byte* ClipTab = NULL; + int small_gap; + int indexA, indexB; + + int QP; + int xQ, yQ; + + PixelPos pixP, pixQ; + VideoParameters *p_Vid = MbQ->p_Vid; + int bitdepth_scale = p_Vid->bitdepth_scale[IS_LUMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[PLANE_Y]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + + Macroblock *MbP; + imgpel *SrcPtrP, *SrcPtrQ; + + for( pel = 0 ; pel < MB_BLOCK_SIZE ; ++pel ) + { + xQ = edge; + yQ = pel; + getAffNeighbourXPLuma(MbQ, xQ - 1, yQ, &pixP); + + if (pixP.available || (MbQ->DFDisableIdc== 0)) + { + if( (Strng = Strength[pel]) != 0) + { + getAffNeighbourXPLuma(MbQ, xQ, yQ, &pixQ); // TODO: PP + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + + SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + + // Average QP of the two blocks + QP = (MbP->qp + MbQ->qp + 1) >> 1; + + indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + Beta = BETA_TABLE [indexB] * bitdepth_scale; + ClipTab = CLIP_TAB[indexA]; + + L0 = SrcPtrP[0] ; + R0 = SrcPtrQ[0] ; + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel L1 = SrcPtrP[-1]; + imgpel R1 = SrcPtrQ[ 1]; + if ((abs( R0 - R1) < Beta ) && (abs(L0 - L1) < Beta )) + { + imgpel L2 = SrcPtrP[-2]; + imgpel R2 = SrcPtrQ[ 2]; + if(Strng == 4 ) // INTRA strong filtering + { + int RL0 = L0 + R0; + small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2)); + aq = ( abs( R0 - R2) < Beta ) & small_gap; + ap = ( abs( L0 - L2) < Beta ) & small_gap; + + if (ap) + { + imgpel L3 = SrcPtrP[-3]; + SrcPtrP[-2] = (imgpel) ((((L3 + L2) << 1) + L2 + L1 + RL0 + 4) >> 3); + SrcPtrP[-1 ] = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2); + SrcPtrP[ 0 ] = (imgpel) (( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3); + } + else + { + SrcPtrP[ 0 ] = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ; + } + + if (aq) + { + imgpel R3 = SrcPtrQ[ 3]; + SrcPtrQ[ 0 ] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3); + SrcPtrQ[ 1 ] = (imgpel) (( R2 + R0 + R1 + L0 + 2) >> 2); + SrcPtrQ[ 2 ] = (imgpel) ((((R3 + R2) << 1) + R2 + R1 + RL0 + 4) >> 3); + } + else + { + SrcPtrQ[ 0 ] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2); + } + } + else // normal filtering + { + int RL0 = (L0 + R0 + 1) >> 1; + aq = (abs( R0 - R2) < Beta); + ap = (abs( L0 - L2) < Beta); + + C0 = ClipTab[ Strng ] * bitdepth_scale; + tc0 = (C0 + ap + aq) ; + dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3) ; + + if( ap ) + *(SrcPtrP - 1) += iClip3( -C0, C0, ( L2 + RL0 - (L1 << 1)) >> 1 ) ; + + *SrcPtrP = (imgpel) iClip1 (max_imgpel_value, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 (max_imgpel_value, R0 - dif) ; + + if( aq ) + *(SrcPtrQ + 1) += iClip3( -C0, C0, ( R2 + RL0 - (R1 << 1)) >> 1 ) ; + } + } + } + } + } + } +} diff --git a/Src/h264dec/ldecod/src/fmo.c b/Src/h264dec/ldecod/src/fmo.c new file mode 100644 index 00000000..cb32230e --- /dev/null +++ b/Src/h264dec/ldecod/src/fmo.c @@ -0,0 +1,552 @@ + +/*! + ***************************************************************************** + * + * \file fmo.c + * + * \brief + * Support for Flexible Macroblock Ordering (FMO) + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger stewe@cs.tu-berlin.de + * - Karsten Suehring suehring@hhi.de + ****************************************************************************** + */ + +#include "global.h" +#include "elements.h" +#include "defines.h" +#include "header.h" +#include "fmo.h" + +//#define PRINT_FMO_MAPS + +static void FmoGenerateType0MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); +static void FmoGenerateType1MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); +static void FmoGenerateType2MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); +static void FmoGenerateType3MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); +static void FmoGenerateType4MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); +static void FmoGenerateType5MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); +static void FmoGenerateType6MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ); + + +/*! + ************************************************************************ + * \brief + * Generates p_Vid->MapUnitToSliceGroupMap + * Has to be called every time a new Picture Parameter Set is used + * + * \param p_Vid + * image encoding parameters for current picture + * + ************************************************************************ + */ +static int FmoGenerateMapUnitToSliceGroupMap (VideoParameters *p_Vid) +{ + seq_parameter_set_rbsp_t* sps = p_Vid->active_sps; + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + + unsigned int NumSliceGroupMapUnits; + + NumSliceGroupMapUnits = (sps->pic_height_in_map_units_minus1+1)* (sps->pic_width_in_mbs_minus1+1); + + if (pps->slice_group_map_type == 6) + { + if ((pps->pic_size_in_map_units_minus1 + 1) != NumSliceGroupMapUnits) + { + error ("wrong pps->pic_size_in_map_units_minus1 for used SPS and FMO type 6", 500); + } + } + + // allocate memory for p_Vid->MapUnitToSliceGroupMap + if (p_Vid->MapUnitToSliceGroupMap) + free (p_Vid->MapUnitToSliceGroupMap); + if ((p_Vid->MapUnitToSliceGroupMap = malloc ((NumSliceGroupMapUnits) * sizeof (int))) == NULL) + { + printf ("cannot allocated %d bytes for p_Vid->MapUnitToSliceGroupMap, exit\n", (int) ( (pps->pic_size_in_map_units_minus1+1) * sizeof (int))); + exit (-1); + } + + if (pps->num_slice_groups_minus1 == 0) // only one slice group + { + memset (p_Vid->MapUnitToSliceGroupMap, 0, NumSliceGroupMapUnits * sizeof (int)); + return 0; + } + + switch (pps->slice_group_map_type) + { + case 0: + FmoGenerateType0MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + case 1: + FmoGenerateType1MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + case 2: + FmoGenerateType2MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + case 3: + FmoGenerateType3MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + case 4: + FmoGenerateType4MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + case 5: + FmoGenerateType5MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + case 6: + FmoGenerateType6MapUnitMap (p_Vid, NumSliceGroupMapUnits); + break; + default: + printf ("Illegal slice_group_map_type %d , exit \n", (int) pps->slice_group_map_type); + exit (-1); + } + return 0; +} + + +/*! + ************************************************************************ + * \brief + * Generates p_Vid->MbToSliceGroupMap from p_Vid->MapUnitToSliceGroupMap + * + * \param p_Vid + * image encoding parameters for current picture + * + ************************************************************************ + */ +static int FmoGenerateMbToSliceGroupMap (VideoParameters *p_Vid) +{ + seq_parameter_set_rbsp_t* sps = p_Vid->active_sps; + + unsigned i; + + // allocate memory for p_Vid->MbToSliceGroupMap + if (p_Vid->MbToSliceGroupMap) + free (p_Vid->MbToSliceGroupMap); + + if ((p_Vid->MbToSliceGroupMap = malloc ((p_Vid->PicSizeInMbs) * sizeof (int))) == NULL) + { + printf ("cannot allocate %d bytes for p_Vid->MbToSliceGroupMap, exit\n", (int) ((p_Vid->PicSizeInMbs) * sizeof (int))); + exit (-1); + } + + + if ((sps->frame_mbs_only_flag)|| p_Vid->field_pic_flag) + { + for (i=0; i<p_Vid->PicSizeInMbs; i++) + { + p_Vid->MbToSliceGroupMap[i] = p_Vid->MapUnitToSliceGroupMap[i]; + } + } + else + if (sps->mb_adaptive_frame_field_flag && (!p_Vid->field_pic_flag)) + { + for (i=0; i<p_Vid->PicSizeInMbs; i++) + { + p_Vid->MbToSliceGroupMap[i] = p_Vid->MapUnitToSliceGroupMap[i/2]; + } + } + else + { + for (i=0; i<p_Vid->PicSizeInMbs; i++) + { + p_Vid->MbToSliceGroupMap[i] = p_Vid->MapUnitToSliceGroupMap[(i/(2*p_Vid->PicWidthInMbs))*p_Vid->PicWidthInMbs+(i%p_Vid->PicWidthInMbs)]; + } + } + return 0; +} + + +/*! + ************************************************************************ + * \brief + * FMO initialization: Generates p_Vid->MapUnitToSliceGroupMap and p_Vid->MbToSliceGroupMap. + * + * \param p_Vid + * image encoding parameters for current picture + ************************************************************************ + */ +int fmo_init(VideoParameters *p_Vid) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + +#ifdef PRINT_FMO_MAPS + unsigned i,j; +#endif + + FmoGenerateMapUnitToSliceGroupMap(p_Vid); + FmoGenerateMbToSliceGroupMap(p_Vid); + + p_Vid->NumberOfSliceGroups = pps->num_slice_groups_minus1 + 1; + +#ifdef PRINT_FMO_MAPS + printf("\n"); + printf("FMO Map (Units):\n"); + + for (j=0; j<p_Vid->PicHeightInMapUnits; j++) + { + for (i=0; i<p_Vid->PicWidthInMbs; i++) + { + printf("%c",48+p_Vid->MapUnitToSliceGroupMap[i+j*p_Vid->PicWidthInMbs]); + } + printf("\n"); + } + printf("\n"); + printf("FMO Map (Mb):\n"); + + for (j=0; j<p_Vid->PicHeightInMbs; j++) + { + for (i=0; i<p_Vid->PicWidthInMbs; i++) + { + printf("%c",48 + p_Vid->MbToSliceGroupMap[i + j * p_Vid->PicWidthInMbs]); + } + printf("\n"); + } + printf("\n"); + +#endif + + return 0; +} + + +/*! + ************************************************************************ + * \brief + * Free memory allocated by FMO functions + ************************************************************************ + */ +int FmoFinit(VideoParameters *p_Vid) +{ + if (p_Vid->MbToSliceGroupMap) + { + free (p_Vid->MbToSliceGroupMap); + p_Vid->MbToSliceGroupMap = NULL; + } + if (p_Vid->MapUnitToSliceGroupMap) + { + free (p_Vid->MapUnitToSliceGroupMap); + p_Vid->MapUnitToSliceGroupMap = NULL; + } + return 0; +} + + +/*! + ************************************************************************ + * \brief + * FmoGetNumberOfSliceGroup(p_Vid) + * + * \par p_Vid: + * VideoParameters + ************************************************************************ + */ +int FmoGetNumberOfSliceGroup(VideoParameters *p_Vid) +{ + return p_Vid->NumberOfSliceGroups; +} + + +/*! + ************************************************************************ + * \brief + * FmoGetLastMBOfPicture(p_Vid) + * returns the macroblock number of the last MB in a picture. This + * mb happens to be the last macroblock of the picture if there is only + * one slice group + * + * \par Input: + * None + ************************************************************************ + */ +int FmoGetLastMBOfPicture(VideoParameters *p_Vid) +{ + return FmoGetLastMBInSliceGroup (p_Vid, FmoGetNumberOfSliceGroup(p_Vid)-1); +} + + +/*! + ************************************************************************ + * \brief + * FmoGetLastMBInSliceGroup: Returns MB number of last MB in SG + * + * \par Input: + * SliceGroupID (0 to 7) + ************************************************************************ + */ + +int FmoGetLastMBInSliceGroup (VideoParameters *p_Vid, int SliceGroup) +{ + int i; + + for (i=p_Vid->PicSizeInMbs-1; i>=0; i--) + if (FmoGetSliceGroupId (p_Vid, i) == SliceGroup) + return i; + return -1; + +} + + +/*! + ************************************************************************ + * \brief + * Returns SliceGroupID for a given MB + * + * \param p_Vid + * image encoding parameters for current picture + * \param mb + * Macroblock number (in scan order) + ************************************************************************ + */ +int FmoGetSliceGroupId (VideoParameters *p_Vid, int mb) +{ + assert (mb < (int) p_Vid->PicSizeInMbs); + assert (p_Vid->MbToSliceGroupMap != NULL); + return p_Vid->MbToSliceGroupMap[mb]; +} + + +/*! + ************************************************************************ + * \brief + * FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next + * MB in the (scattered) Slice, -1 if the slice is finished + * \param p_Vid + * image encoding parameters for current picture + * + * \param CurrentMbNr + * number of the current macroblock + ************************************************************************ + */ +int FmoGetNextMBNr (VideoParameters *p_Vid, int CurrentMbNr) +{ + int SliceGroup = FmoGetSliceGroupId (p_Vid, CurrentMbNr); + + while (++CurrentMbNr<(int)p_Vid->PicSizeInMbs && p_Vid->MbToSliceGroupMap [CurrentMbNr] != SliceGroup) + ; + + if (CurrentMbNr >= (int)p_Vid->PicSizeInMbs) + return -1; // No further MB in this slice (could be end of picture) + else + return CurrentMbNr; +} + + +/*! + ************************************************************************ + * \brief + * Generate interleaved slice group map type MapUnit map (type 0) + * + ************************************************************************ + */ +static void FmoGenerateType0MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + unsigned iGroup, j; + unsigned i = 0; + do + { + for( iGroup = 0; + (iGroup <= pps->num_slice_groups_minus1) && (i < PicSizeInMapUnits); + i += pps->run_length_minus1[iGroup++] + 1 ) + { + for( j = 0; j <= pps->run_length_minus1[ iGroup ] && i + j < PicSizeInMapUnits; j++ ) + p_Vid->MapUnitToSliceGroupMap[i+j] = iGroup; + } + } + while( i < PicSizeInMapUnits ); +} + + +/*! + ************************************************************************ + * \brief + * Generate dispersed slice group map type MapUnit map (type 1) + * + ************************************************************************ + */ +static void FmoGenerateType1MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + unsigned i; + for( i = 0; i < PicSizeInMapUnits; i++ ) + { + p_Vid->MapUnitToSliceGroupMap[i] = ((i%p_Vid->PicWidthInMbs)+(((i/p_Vid->PicWidthInMbs)*(pps->num_slice_groups_minus1+1))/2)) + %(pps->num_slice_groups_minus1+1); + } +} + +/*! + ************************************************************************ + * \brief + * Generate foreground with left-over slice group map type MapUnit map (type 2) + * + ************************************************************************ + */ +static void FmoGenerateType2MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + int iGroup; + unsigned i, x, y; + unsigned yTopLeft, xTopLeft, yBottomRight, xBottomRight; + + for( i = 0; i < PicSizeInMapUnits; i++ ) + p_Vid->MapUnitToSliceGroupMap[ i ] = pps->num_slice_groups_minus1; + + for( iGroup = pps->num_slice_groups_minus1 - 1 ; iGroup >= 0; iGroup-- ) + { + yTopLeft = pps->top_left[ iGroup ] / p_Vid->PicWidthInMbs; + xTopLeft = pps->top_left[ iGroup ] % p_Vid->PicWidthInMbs; + yBottomRight = pps->bottom_right[ iGroup ] / p_Vid->PicWidthInMbs; + xBottomRight = pps->bottom_right[ iGroup ] % p_Vid->PicWidthInMbs; + for( y = yTopLeft; y <= yBottomRight; y++ ) + for( x = xTopLeft; x <= xBottomRight; x++ ) + p_Vid->MapUnitToSliceGroupMap[ y * p_Vid->PicWidthInMbs + x ] = iGroup; + } +} + + +/*! + ************************************************************************ + * \brief + * Generate box-out slice group map type MapUnit map (type 3) + * + ************************************************************************ + */ +static void FmoGenerateType3MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + unsigned i, k; + int leftBound, topBound, rightBound, bottomBound; + int x, y, xDir, yDir; + int mapUnitVacant; + + unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * p_Vid->slice_group_change_cycle, PicSizeInMapUnits); + + for( i = 0; i < PicSizeInMapUnits; i++ ) + p_Vid->MapUnitToSliceGroupMap[ i ] = 2; + + x = ( p_Vid->PicWidthInMbs - pps->slice_group_change_direction_flag ) / 2; + y = ( p_Vid->PicHeightInMapUnits - pps->slice_group_change_direction_flag ) / 2; + + leftBound = x; + topBound = y; + rightBound = x; + bottomBound = y; + + xDir = pps->slice_group_change_direction_flag - 1; + yDir = pps->slice_group_change_direction_flag; + + for( k = 0; k < PicSizeInMapUnits; k += mapUnitVacant ) + { + mapUnitVacant = ( p_Vid->MapUnitToSliceGroupMap[ y * p_Vid->PicWidthInMbs + x ] == 2 ); + if( mapUnitVacant ) + p_Vid->MapUnitToSliceGroupMap[ y * p_Vid->PicWidthInMbs + x ] = ( k >= mapUnitsInSliceGroup0 ); + + if( xDir == -1 && x == leftBound ) + { + leftBound = imax( leftBound - 1, 0 ); + x = leftBound; + xDir = 0; + yDir = 2 * pps->slice_group_change_direction_flag - 1; + } + else + if( xDir == 1 && x == rightBound ) + { + rightBound = imin( rightBound + 1, (int)p_Vid->PicWidthInMbs - 1 ); + x = rightBound; + xDir = 0; + yDir = 1 - 2 * pps->slice_group_change_direction_flag; + } + else + if( yDir == -1 && y == topBound ) + { + topBound = imax( topBound - 1, 0 ); + y = topBound; + xDir = 1 - 2 * pps->slice_group_change_direction_flag; + yDir = 0; + } + else + if( yDir == 1 && y == bottomBound ) + { + bottomBound = imin( bottomBound + 1, (int)p_Vid->PicHeightInMapUnits - 1 ); + y = bottomBound; + xDir = 2 * pps->slice_group_change_direction_flag - 1; + yDir = 0; + } + else + { + x = x + xDir; + y = y + yDir; + } + } + +} + +/*! + ************************************************************************ + * \brief + * Generate raster scan slice group map type MapUnit map (type 4) + * + ************************************************************************ + */ +static void FmoGenerateType4MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + + unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * p_Vid->slice_group_change_cycle, PicSizeInMapUnits); + unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0; + + unsigned i; + + for( i = 0; i < PicSizeInMapUnits; i++ ) + if( i < sizeOfUpperLeftGroup ) + p_Vid->MapUnitToSliceGroupMap[ i ] = pps->slice_group_change_direction_flag; + else + p_Vid->MapUnitToSliceGroupMap[ i ] = 1 - pps->slice_group_change_direction_flag; + +} + +/*! + ************************************************************************ + * \brief + * Generate wipe slice group map type MapUnit map (type 5) + * + ************************************************************************ + */ +static void FmoGenerateType5MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + + unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * p_Vid->slice_group_change_cycle, PicSizeInMapUnits); + unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0; + + unsigned i,j, k = 0; + + for( j = 0; j < p_Vid->PicWidthInMbs; j++ ) + for( i = 0; i < p_Vid->PicHeightInMapUnits; i++ ) + if( k++ < sizeOfUpperLeftGroup ) + p_Vid->MapUnitToSliceGroupMap[ i * p_Vid->PicWidthInMbs + j ] = pps->slice_group_change_direction_flag; + else + p_Vid->MapUnitToSliceGroupMap[ i * p_Vid->PicWidthInMbs + j ] = 1 - pps->slice_group_change_direction_flag; + +} + +/*! + ************************************************************************ + * \brief + * Generate explicit slice group map type MapUnit map (type 6) + * + ************************************************************************ + */ +static void FmoGenerateType6MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits ) +{ + pic_parameter_set_rbsp_t* pps = p_Vid->active_pps; + unsigned i; + for (i=0; i<PicSizeInMapUnits; i++) + { + p_Vid->MapUnitToSliceGroupMap[i] = pps->slice_group_id[i]; + } +} + diff --git a/Src/h264dec/ldecod/src/header.c b/Src/h264dec/ldecod/src/header.c new file mode 100644 index 00000000..1823d82b --- /dev/null +++ b/Src/h264dec/ldecod/src/header.c @@ -0,0 +1,857 @@ + +/*! + ************************************************************************************* + * \file header.c + * + * \brief + * H.264 Slice headers + * + ************************************************************************************* + */ + +#include "global.h" +#include "elements.h" +#include "defines.h" +#include "fmo.h" +#include "vlc.h" +#include "mbuffer.h" +#include "header.h" + +#include "ctx_tables.h" + + +#if TRACE +#define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE) +#else +#define SYMTRACESTRING(s) // do nothing +#endif + +static void ref_pic_list_reordering(Slice *currSlice); +static void pred_weight_table(Slice *currSlice); + + +/*! + ************************************************************************ + * \brief + * calculate Ceil(Log2(uiVal)) + ************************************************************************ + */ +unsigned CeilLog2( unsigned uiVal) +{ + unsigned uiTmp = uiVal-1; + unsigned uiRet = 0; + + while( uiTmp != 0 ) + { + uiTmp >>= 1; + uiRet++; + } + return uiRet; +} + +unsigned CeilLog2_sf( unsigned uiVal) +{ + unsigned uiTmp = uiVal-1; + unsigned uiRet = 0; + + while( uiTmp > 0 ) + { + uiTmp >>= 1; + uiRet++; + } + return uiRet; +} + +/*! + ************************************************************************ + * \brief + * read the first part of the header (only the pic_parameter_set_id) + * \return + * Length of the first part of the slice header (in bits) + ************************************************************************ + */ +void FirstPartOfSliceHeader(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + int tmp; + + // Get first_mb_in_slice + currSlice->start_mb_nr = ue_v ("SH: first_mb_in_slice", currStream); + + tmp = ue_v ("SH: slice_type", currStream); + + if (tmp > 4) tmp -= 5; + + p_Vid->type = currSlice->slice_type = (SliceType) tmp; + + currSlice->pic_parameter_set_id = ue_v ("SH: pic_parameter_set_id", currStream); + + if( p_Vid->separate_colour_plane_flag ) + p_Vid->colour_plane_id = u_v (2, "SH: colour_plane_id", currStream); + else + p_Vid->colour_plane_id = PLANE_Y; +} + +/*! + ************************************************************************ + * \brief + * read the scond part of the header (without the pic_parameter_set_id + * \return + * Length of the second part of the Slice header in bits + ************************************************************************ + */ +void RestOfSliceHeader(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + InputParameters *p_Inp = currSlice->p_Inp; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + + int val, len; + + p_Vid->frame_num = u_v (active_sps->log2_max_frame_num_minus4 + 4, "SH: frame_num", currStream); + + /* Tian Dong: frame_num gap processing, if found */ + if (p_Vid->idr_flag) + { + p_Vid->pre_frame_num = p_Vid->frame_num; + // picture error concealment + p_Vid->last_ref_pic_poc = 0; + assert(p_Vid->frame_num == 0); + } + + if (active_sps->frame_mbs_only_flag) + { + p_Vid->structure = FRAME; + p_Vid->field_pic_flag=0; + } + else + { + // field_pic_flag u(1) + p_Vid->field_pic_flag = u_1("SH: field_pic_flag", currStream); + if (p_Vid->field_pic_flag) + { + // bottom_field_flag u(1) + p_Vid->bottom_field_flag = (byte)u_1("SH: bottom_field_flag", currStream); + p_Vid->structure = p_Vid->bottom_field_flag ? BOTTOM_FIELD : TOP_FIELD; + } + else + { + p_Vid->structure = FRAME; + p_Vid->bottom_field_flag = FALSE; + } + } + + currSlice->structure = (PictureStructure) p_Vid->structure; + + p_Vid->mb_aff_frame_flag=(active_sps->mb_adaptive_frame_field_flag && (p_Vid->field_pic_flag==0)); + currSlice->mb_aff_frame_flag = p_Vid->mb_aff_frame_flag; + + if (p_Vid->structure == FRAME ) + assert (p_Vid->field_pic_flag == 0); + if (p_Vid->structure == TOP_FIELD ) + assert (p_Vid->field_pic_flag == 1 && (p_Vid->bottom_field_flag == FALSE)); + if (p_Vid->structure == BOTTOM_FIELD) + assert (p_Vid->field_pic_flag == 1 && (p_Vid->bottom_field_flag == TRUE )); + + if (p_Vid->idr_flag) + { + p_Vid->idr_pic_id = ue_v("SH: idr_pic_id", currStream); + } + + if (active_sps->pic_order_cnt_type == 0) + { + p_Vid->pic_order_cnt_lsb = u_v(active_sps->log2_max_pic_order_cnt_lsb_minus4 + 4, "SH: pic_order_cnt_lsb", currStream); + if( p_Vid->active_pps->bottom_field_pic_order_in_frame_present_flag == 1 && !p_Vid->field_pic_flag ) + p_Vid->delta_pic_order_cnt_bottom = se_v("SH: delta_pic_order_cnt_bottom", currStream); + else + p_Vid->delta_pic_order_cnt_bottom = 0; + } + if( active_sps->pic_order_cnt_type == 1 && !active_sps->delta_pic_order_always_zero_flag ) + { + p_Vid->delta_pic_order_cnt[ 0 ] = se_v("SH: delta_pic_order_cnt[0]", currStream); + if( p_Vid->active_pps->bottom_field_pic_order_in_frame_present_flag == 1 && !p_Vid->field_pic_flag ) + p_Vid->delta_pic_order_cnt[ 1 ] = se_v("SH: delta_pic_order_cnt[1]", currStream); + }else + { + if (active_sps->pic_order_cnt_type == 1) + { + p_Vid->delta_pic_order_cnt[ 0 ] = 0; + p_Vid->delta_pic_order_cnt[ 1 ] = 0; + } + } + + //! redundant_pic_cnt is missing here + if (p_Vid->active_pps->redundant_pic_cnt_present_flag) + { + p_Vid->redundant_pic_cnt = ue_v ("SH: redundant_pic_cnt", currStream); + } + + if(currSlice->slice_type == B_SLICE) + { + currSlice->direct_spatial_mv_pred_flag = u_1 ("SH: direct_spatial_mv_pred_flag", currStream); + } + + currSlice->num_ref_idx_l0_active = p_Vid->active_pps->num_ref_idx_l0_active_minus1 + 1; + currSlice->num_ref_idx_l1_active = p_Vid->active_pps->num_ref_idx_l1_active_minus1 + 1; + + if(p_Vid->type==P_SLICE || p_Vid->type == SP_SLICE || p_Vid->type==B_SLICE) + { + val = u_1 ("SH: num_ref_idx_override_flag", currStream); + if (val) + { + currSlice->num_ref_idx_l0_active = 1 + ue_v ("SH: num_ref_idx_l0_active_minus1", currStream); + + if(p_Vid->type==B_SLICE) + { + currSlice->num_ref_idx_l1_active = 1 + ue_v ("SH: num_ref_idx_l1_active_minus1", currStream); + } + } + } + if (currSlice->slice_type!=B_SLICE) + { + currSlice->num_ref_idx_l1_active = 0; + } + + ref_pic_list_reordering(currSlice); + + currSlice->apply_weights = ((p_Vid->active_pps->weighted_pred_flag && (currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE) ) + || ((p_Vid->active_pps->weighted_bipred_idc > 0 ) && (currSlice->slice_type == B_SLICE))); + + if ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) + { + pred_weight_table(currSlice); + } + + if (p_Vid->nal_reference_idc) + dec_ref_pic_marking(p_Vid, currStream); + + if (p_Vid->active_pps->entropy_coding_mode_flag && p_Vid->type!=I_SLICE && p_Vid->type!=SI_SLICE) + { + currSlice->model_number = ue_v("SH: cabac_init_idc", currStream); + } + else + { + currSlice->model_number = 0; + } + + currSlice->slice_qp_delta = val = se_v("SH: slice_qp_delta", currStream); + currSlice->qp = p_Vid->qp = 26 + p_Vid->active_pps->pic_init_qp_minus26 + val; + + if ((p_Vid->qp < -p_Vid->bitdepth_luma_qp_scale) || (p_Vid->qp > 51)) + error ("slice_qp_delta makes slice_qp_y out of range", 500); + + if(p_Vid->type==SP_SLICE || p_Vid->type == SI_SLICE) + { + if(p_Vid->type==SP_SLICE) + { + p_Vid->sp_switch = u_1 ("SH: sp_for_switch_flag", currStream); + } + currSlice->slice_qs_delta = val = se_v("SH: slice_qs_delta", currStream); + currSlice->qs = 26 + p_Vid->active_pps->pic_init_qs_minus26 + val; + if ((currSlice->qs < 0) || (currSlice->qs > 51)) + error ("slice_qs_delta makes slice_qs_y out of range", 500); + } + + if ( !HI_INTRA_ONLY_PROFILE || (HI_INTRA_ONLY_PROFILE && (p_Inp->intra_profile_deblocking == 1) )) + //then read flags and parameters from bistream + { + if (p_Vid->active_pps->deblocking_filter_control_present_flag) + { + currSlice->DFDisableIdc = (short)ue_v ("SH: disable_deblocking_filter_idc", currStream); + + if (currSlice->DFDisableIdc!=1) + { + currSlice->DFAlphaC0Offset = (short) (2 * se_v("SH: slice_alpha_c0_offset_div2", currStream)); + currSlice->DFBetaOffset = (short) (2 * se_v("SH: slice_beta_offset_div2", currStream)); + } + else + { + currSlice->DFAlphaC0Offset = currSlice->DFBetaOffset = 0; + } + } + else + { + currSlice->DFDisableIdc = currSlice->DFAlphaC0Offset = currSlice->DFBetaOffset = 0; + } + } + else //By default the Loop Filter is Off + { //444_TEMP_NOTE: change made below. 08/07/07 + //still need to parse the SEs (read flags and parameters from bistream) but will ignore + if (p_Vid->active_pps->deblocking_filter_control_present_flag) + { + currSlice->DFDisableIdc = (short) ue_v ("SH: disable_deblocking_filter_idc", currStream); + + if (currSlice->DFDisableIdc!=1) + { + currSlice->DFAlphaC0Offset = (short) (2 * se_v("SH: slice_alpha_c0_offset_div2", currStream)); + currSlice->DFBetaOffset = (short) (2 * se_v("SH: slice_beta_offset_div2", currStream)); + } + }//444_TEMP_NOTE. the end of change. 08/07/07 + //Ignore the SEs, by default the Loop Filter is Off + currSlice->DFDisableIdc =1; + currSlice->DFAlphaC0Offset = currSlice->DFBetaOffset = 0; + } + + + if (p_Vid->active_pps->num_slice_groups_minus1>0 && p_Vid->active_pps->slice_group_map_type>=3 && + p_Vid->active_pps->slice_group_map_type<=5) + { + len = (active_sps->pic_height_in_map_units_minus1+1)*(active_sps->pic_width_in_mbs_minus1+1)/ + (p_Vid->active_pps->slice_group_change_rate_minus1+1); + if (((active_sps->pic_height_in_map_units_minus1+1)*(active_sps->pic_width_in_mbs_minus1+1))% + (p_Vid->active_pps->slice_group_change_rate_minus1+1)) + len +=1; + + len = CeilLog2(len+1); + + p_Vid->slice_group_change_cycle = u_v (len, "SH: slice_group_change_cycle", currStream); + } + p_Vid->PicHeightInMbs = p_Vid->FrameHeightInMbs / ( 1 + p_Vid->field_pic_flag ); + p_Vid->PicSizeInMbs = p_Vid->PicWidthInMbs * p_Vid->PicHeightInMbs; + p_Vid->FrameSizeInMbs = p_Vid->PicWidthInMbs * p_Vid->FrameHeightInMbs; +} + + +/*! + ************************************************************************ + * \brief + * read the reference picture reordering information + ************************************************************************ + */ +static void ref_pic_list_reordering(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + int i, val; + + alloc_ref_pic_list_reordering_buffer(currSlice); + + if (p_Vid->type!=I_SLICE && p_Vid->type!=SI_SLICE) + { + val = currSlice->ref_pic_list_reordering_flag_l0 = u_1 ("SH: ref_pic_list_reordering_flag_l0", currStream); + + if (val) + { + i=0; + do + { + val = currSlice->reordering_of_pic_nums_idc_l0[i] = ue_v("SH: reordering_of_pic_nums_idc_l0", currStream); + if (val==0 || val==1) + { + currSlice->abs_diff_pic_num_minus1_l0[i] = ue_v("SH: abs_diff_pic_num_minus1_l0", currStream); + } + else + { + if (val==2) + { + currSlice->long_term_pic_idx_l0[i] = ue_v("SH: long_term_pic_idx_l0", currStream); + } + } + i++; + // assert (i>currSlice->num_ref_idx_l0_active); + } while (val != 3); + } + } + + if (p_Vid->type==B_SLICE) + { + val = currSlice->ref_pic_list_reordering_flag_l1 = u_1 ("SH: ref_pic_list_reordering_flag_l1", currStream); + + if (val) + { + i=0; + do + { + val = currSlice->reordering_of_pic_nums_idc_l1[i] = ue_v("SH: reordering_of_pic_nums_idc_l1", currStream); + if (val==0 || val==1) + { + currSlice->abs_diff_pic_num_minus1_l1[i] = ue_v("SH: abs_diff_pic_num_minus1_l1", currStream); + } + else + { + if (val==2) + { + currSlice->long_term_pic_idx_l1[i] = ue_v("SH: long_term_pic_idx_l1", currStream); + } + } + i++; + // assert (i>currSlice->num_ref_idx_l1_active); + } while (val != 3); + } + } + + // set reference index of redundant slices. + if(p_Vid->redundant_pic_cnt && (p_Vid->type != I_SLICE) ) + { + p_Vid->redundant_slice_ref_idx = currSlice->abs_diff_pic_num_minus1_l0[0] + 1; + } +} + + +static void reset_wp_params(Slice *currSlice) +{ + int i,comp; + int log_weight_denom; + + for (i=0; i<MAX_REFERENCE_PICTURES; i++) + { + for (comp=0; comp<3; comp++) + { + log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom; + currSlice->wp_weight[0][i][comp] = 1 << log_weight_denom; + currSlice->wp_weight[1][i][comp] = 1 << log_weight_denom; + } + } +} + +/*! + ************************************************************************ + * \brief + * read the weighted prediction tables + ************************************************************************ + */ +static void pred_weight_table(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + int luma_weight_flag_l0, luma_weight_flag_l1, chroma_weight_flag_l0, chroma_weight_flag_l1; + int i,j; + + currSlice->luma_log2_weight_denom = ue_v ("SH: luma_log2_weight_denom", currStream); + currSlice->wp_round_luma = currSlice->luma_log2_weight_denom ? 1<<(currSlice->luma_log2_weight_denom - 1): 0; + + if ( 0 != active_sps->chroma_format_idc) + { + currSlice->chroma_log2_weight_denom = ue_v ("SH: chroma_log2_weight_denom", currStream); + currSlice->wp_round_chroma = currSlice->chroma_log2_weight_denom ? 1<<(currSlice->chroma_log2_weight_denom - 1): 0; + } + + reset_wp_params(currSlice); + + for (i=0; i<currSlice->num_ref_idx_l0_active; i++) + { + luma_weight_flag_l0 = u_1("SH: luma_weight_flag_l0", currStream); + + if (luma_weight_flag_l0) + { + currSlice->wp_weight[0][i][0] = se_v ("SH: luma_weight_l0", currStream); + currSlice->wp_offset[0][i][0] = se_v ("SH: luma_offset_l0", currStream); + currSlice->wp_offset[0][i][0] = currSlice->wp_offset[0][i][0]<<(p_Vid->bitdepth_luma - 8); + } + else + { + currSlice->wp_weight[0][i][0] = 1 << currSlice->luma_log2_weight_denom; + currSlice->wp_offset[0][i][0] = 0; + } + + if (active_sps->chroma_format_idc != 0) + { + chroma_weight_flag_l0 = u_1 ("SH: chroma_weight_flag_l0", currStream); + + for (j=1; j<3; j++) + { + if (chroma_weight_flag_l0) + { + currSlice->wp_weight[0][i][j] = se_v("SH: chroma_weight_l0", currStream); + currSlice->wp_offset[0][i][j] = se_v("SH: chroma_offset_l0", currStream); + currSlice->wp_offset[0][i][j] = currSlice->wp_offset[0][i][j]<<(p_Vid->bitdepth_chroma-8); + } + else + { + currSlice->wp_weight[0][i][j] = 1<<currSlice->chroma_log2_weight_denom; + currSlice->wp_offset[0][i][j] = 0; + } + } + } + } + if ((p_Vid->type == B_SLICE) && p_Vid->active_pps->weighted_bipred_idc == 1) + { + for (i=0; i<currSlice->num_ref_idx_l1_active; i++) + { + luma_weight_flag_l1 = u_1("SH: luma_weight_flag_l1", currStream); + + if (luma_weight_flag_l1) + { + currSlice->wp_weight[1][i][0] = se_v ("SH: luma_weight_l1", currStream); + currSlice->wp_offset[1][i][0] = se_v ("SH: luma_offset_l1", currStream); + currSlice->wp_offset[1][i][0] = currSlice->wp_offset[1][i][0]<<(p_Vid->bitdepth_luma-8); + } + else + { + currSlice->wp_weight[1][i][0] = 1<<currSlice->luma_log2_weight_denom; + currSlice->wp_offset[1][i][0] = 0; + } + + if (active_sps->chroma_format_idc != 0) + { + chroma_weight_flag_l1 = u_1 ("SH: chroma_weight_flag_l1", currStream); + + for (j=1; j<3; j++) + { + if (chroma_weight_flag_l1) + { + currSlice->wp_weight[1][i][j] = se_v("SH: chroma_weight_l1", currStream); + currSlice->wp_offset[1][i][j] = se_v("SH: chroma_offset_l1", currStream); + currSlice->wp_offset[1][i][j] = currSlice->wp_offset[1][i][j]<<(p_Vid->bitdepth_chroma-8); + } + else + { + currSlice->wp_weight[1][i][j] = 1<<currSlice->chroma_log2_weight_denom; + currSlice->wp_offset[1][i][j] = 0; + } + } + } + } + } +} + + +/*! + ************************************************************************ + * \brief + * read the memory control operations + ************************************************************************ + */ +void dec_ref_pic_marking(VideoParameters *p_Vid, Bitstream *currStream) +{ + int val; + + DecRefPicMarking_t *tmp_drpm,*tmp_drpm2; + + // free old buffer content + while (p_Vid->dec_ref_pic_marking_buffer) + { + tmp_drpm=p_Vid->dec_ref_pic_marking_buffer; + + p_Vid->dec_ref_pic_marking_buffer=tmp_drpm->Next; + free (tmp_drpm); + } + + if (p_Vid->idr_flag) + { + p_Vid->no_output_of_prior_pics_flag = u_1("SH: no_output_of_prior_pics_flag", currStream); + p_Vid->long_term_reference_flag = u_1("SH: long_term_reference_flag", currStream); + } + else + { + p_Vid->adaptive_ref_pic_buffering_flag = u_1("SH: adaptive_ref_pic_buffering_flag", currStream); + if (p_Vid->adaptive_ref_pic_buffering_flag) + { + // read Memory Management Control Operation + do + { + tmp_drpm=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t)); + tmp_drpm->Next=NULL; + + val = tmp_drpm->memory_management_control_operation = ue_v("SH: memory_management_control_operation", currStream); + + if ((val==1)||(val==3)) + { + tmp_drpm->difference_of_pic_nums_minus1 = ue_v("SH: difference_of_pic_nums_minus1", currStream); + } + if (val==2) + { + tmp_drpm->long_term_pic_num = ue_v("SH: long_term_pic_num", currStream); + } + + if ((val==3)||(val==6)) + { + tmp_drpm->long_term_frame_idx = ue_v("SH: long_term_frame_idx", currStream); + } + if (val==4) + { + tmp_drpm->max_long_term_frame_idx_plus1 = ue_v("SH: max_long_term_pic_idx_plus1", currStream); + } + + // add command + if (p_Vid->dec_ref_pic_marking_buffer==NULL) + { + p_Vid->dec_ref_pic_marking_buffer=tmp_drpm; + } + else + { + tmp_drpm2=p_Vid->dec_ref_pic_marking_buffer; + while (tmp_drpm2->Next!=NULL) tmp_drpm2=tmp_drpm2->Next; + tmp_drpm2->Next=tmp_drpm; + } + + } + while (val != 0); + } + } +} + +/*! + ************************************************************************ + * \brief + * To calculate the poc values + * based upon JVT-F100d2 + * POC200301: Until Jan 2003, this function will calculate the correct POC + * values, but the management of POCs in buffered pictures may need more work. + * \return + * none + ************************************************************************ + */ +void decode_poc(VideoParameters *p_Vid) +{ + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + int i; + // for POC mode 0: + unsigned int MaxPicOrderCntLsb = (1<<(active_sps->log2_max_pic_order_cnt_lsb_minus4+4)); + + switch ( active_sps->pic_order_cnt_type ) + { + case 0: // POC MODE 0 + // 1st + if(p_Vid->idr_flag) + { + p_Vid->PrevPicOrderCntMsb = 0; + p_Vid->PrevPicOrderCntLsb = 0; + } + else + { + if (p_Vid->last_has_mmco_5) + { + if (p_Vid->last_pic_bottom_field) + { + p_Vid->PrevPicOrderCntMsb = 0; + p_Vid->PrevPicOrderCntLsb = 0; + } + else + { + p_Vid->PrevPicOrderCntMsb = 0; + p_Vid->PrevPicOrderCntLsb = p_Vid->toppoc; + } + } + } + // Calculate the MSBs of current picture + if( p_Vid->pic_order_cnt_lsb < p_Vid->PrevPicOrderCntLsb && + ( p_Vid->PrevPicOrderCntLsb - p_Vid->pic_order_cnt_lsb ) >= ( MaxPicOrderCntLsb / 2 ) ) + p_Vid->PicOrderCntMsb = p_Vid->PrevPicOrderCntMsb + MaxPicOrderCntLsb; + else if ( p_Vid->pic_order_cnt_lsb > p_Vid->PrevPicOrderCntLsb && + ( p_Vid->pic_order_cnt_lsb - p_Vid->PrevPicOrderCntLsb ) > ( MaxPicOrderCntLsb / 2 ) ) + p_Vid->PicOrderCntMsb = p_Vid->PrevPicOrderCntMsb - MaxPicOrderCntLsb; + else + p_Vid->PicOrderCntMsb = p_Vid->PrevPicOrderCntMsb; + + // 2nd + + if(p_Vid->field_pic_flag==0) + { //frame pix + p_Vid->toppoc = p_Vid->PicOrderCntMsb + p_Vid->pic_order_cnt_lsb; + p_Vid->bottompoc = p_Vid->toppoc + p_Vid->delta_pic_order_cnt_bottom; + p_Vid->ThisPOC = p_Vid->framepoc = (p_Vid->toppoc < p_Vid->bottompoc)? p_Vid->toppoc : p_Vid->bottompoc; // POC200301 + } + else if (p_Vid->bottom_field_flag == FALSE) + { //top field + p_Vid->ThisPOC= p_Vid->toppoc = p_Vid->PicOrderCntMsb + p_Vid->pic_order_cnt_lsb; + } + else + { //bottom field + p_Vid->ThisPOC= p_Vid->bottompoc = p_Vid->PicOrderCntMsb + p_Vid->pic_order_cnt_lsb; + } + p_Vid->framepoc=p_Vid->ThisPOC; + + if ( p_Vid->frame_num!=p_Vid->PreviousFrameNum) + p_Vid->PreviousFrameNum=p_Vid->frame_num; + + if(p_Vid->nal_reference_idc) + { + p_Vid->PrevPicOrderCntLsb = p_Vid->pic_order_cnt_lsb; + p_Vid->PrevPicOrderCntMsb = p_Vid->PicOrderCntMsb; + } + + break; + + case 1: // POC MODE 1 + // 1st + if(p_Vid->idr_flag) + { + p_Vid->FrameNumOffset=0; // first pix of IDRGOP, + p_Vid->delta_pic_order_cnt[0]=0; //ignore first delta + if(p_Vid->frame_num) + error("frame_num not equal to zero in IDR picture", -1020); + } + else + { + if (p_Vid->last_has_mmco_5) + { + p_Vid->PreviousFrameNumOffset = 0; + p_Vid->PreviousFrameNum = 0; + } + if (p_Vid->frame_num<p_Vid->PreviousFrameNum) + { //not first pix of IDRGOP + p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset + p_Vid->MaxFrameNum; + } + else + { + p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset; + } + } + + // 2nd + if(active_sps->num_ref_frames_in_pic_order_cnt_cycle) + p_Vid->AbsFrameNum = p_Vid->FrameNumOffset+p_Vid->frame_num; + else + p_Vid->AbsFrameNum=0; + if( (!p_Vid->nal_reference_idc) && p_Vid->AbsFrameNum > 0) + p_Vid->AbsFrameNum--; + + // 3rd + p_Vid->ExpectedDeltaPerPicOrderCntCycle=0; + + if(active_sps->num_ref_frames_in_pic_order_cnt_cycle) + for(i=0;i<(int) active_sps->num_ref_frames_in_pic_order_cnt_cycle;i++) + p_Vid->ExpectedDeltaPerPicOrderCntCycle += active_sps->offset_for_ref_frame[i]; + + if(p_Vid->AbsFrameNum) + { + p_Vid->PicOrderCntCycleCnt = (p_Vid->AbsFrameNum-1)/active_sps->num_ref_frames_in_pic_order_cnt_cycle; + p_Vid->FrameNumInPicOrderCntCycle = (p_Vid->AbsFrameNum-1)%active_sps->num_ref_frames_in_pic_order_cnt_cycle; + p_Vid->ExpectedPicOrderCnt = p_Vid->PicOrderCntCycleCnt*p_Vid->ExpectedDeltaPerPicOrderCntCycle; + for(i=0;i<=(int)p_Vid->FrameNumInPicOrderCntCycle;i++) + p_Vid->ExpectedPicOrderCnt += active_sps->offset_for_ref_frame[i]; + } + else + p_Vid->ExpectedPicOrderCnt=0; + + if(!p_Vid->nal_reference_idc) + p_Vid->ExpectedPicOrderCnt += active_sps->offset_for_non_ref_pic; + + if(p_Vid->field_pic_flag==0) + { //frame pix + p_Vid->toppoc = p_Vid->ExpectedPicOrderCnt + p_Vid->delta_pic_order_cnt[0]; + p_Vid->bottompoc = p_Vid->toppoc + active_sps->offset_for_top_to_bottom_field + p_Vid->delta_pic_order_cnt[1]; + p_Vid->ThisPOC = p_Vid->framepoc = (p_Vid->toppoc < p_Vid->bottompoc)? p_Vid->toppoc : p_Vid->bottompoc; // POC200301 + } + else if (p_Vid->bottom_field_flag == FALSE) + { //top field + p_Vid->ThisPOC = p_Vid->toppoc = p_Vid->ExpectedPicOrderCnt + p_Vid->delta_pic_order_cnt[0]; + } + else + { //bottom field + p_Vid->ThisPOC = p_Vid->bottompoc = p_Vid->ExpectedPicOrderCnt + active_sps->offset_for_top_to_bottom_field + p_Vid->delta_pic_order_cnt[0]; + } + p_Vid->framepoc=p_Vid->ThisPOC; + + p_Vid->PreviousFrameNum=p_Vid->frame_num; + p_Vid->PreviousFrameNumOffset=p_Vid->FrameNumOffset; + + break; + + + case 2: // POC MODE 2 + if(p_Vid->idr_flag) // IDR picture + { + p_Vid->FrameNumOffset=0; // first pix of IDRGOP, + p_Vid->ThisPOC = p_Vid->framepoc = p_Vid->toppoc = p_Vid->bottompoc = 0; + if(p_Vid->frame_num) + error("frame_num not equal to zero in IDR picture", -1020); + } + else + { + if (p_Vid->last_has_mmco_5) + { + p_Vid->PreviousFrameNum = 0; + p_Vid->PreviousFrameNumOffset = 0; + } + if (p_Vid->frame_num<p_Vid->PreviousFrameNum) + p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset + p_Vid->MaxFrameNum; + else + p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset; + + + p_Vid->AbsFrameNum = p_Vid->FrameNumOffset+p_Vid->frame_num; + if(!p_Vid->nal_reference_idc) + p_Vid->ThisPOC = (2*p_Vid->AbsFrameNum - 1); + else + p_Vid->ThisPOC = (2*p_Vid->AbsFrameNum); + + if (p_Vid->field_pic_flag==0) + p_Vid->toppoc = p_Vid->bottompoc = p_Vid->framepoc = p_Vid->ThisPOC; + else if (p_Vid->bottom_field_flag == FALSE) + p_Vid->toppoc = p_Vid->framepoc = p_Vid->ThisPOC; + else p_Vid->bottompoc = p_Vid->framepoc = p_Vid->ThisPOC; + } + + p_Vid->PreviousFrameNum=p_Vid->frame_num; + p_Vid->PreviousFrameNumOffset=p_Vid->FrameNumOffset; + break; + + + default: + //error must occurs + assert( 1==0 ); + break; + } +} + +/*! + ************************************************************************ + * \brief + * A little helper for the debugging of POC code + * \return + * none + ************************************************************************ + */ +int dumppoc(VideoParameters *p_Vid) +{ + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + printf ("\nPOC locals...\n"); + printf ("toppoc %d\n", (int) p_Vid->toppoc); + printf ("bottompoc %d\n", (int) p_Vid->bottompoc); + printf ("frame_num %d\n", (int) p_Vid->frame_num); + printf ("field_pic_flag %d\n", (int) p_Vid->field_pic_flag); + printf ("bottom_field_flag %d\n", (int) p_Vid->bottom_field_flag); + printf ("POC SPS\n"); + printf ("log2_max_frame_num_minus4 %d\n", (int) active_sps->log2_max_frame_num_minus4); // POC200301 + printf ("log2_max_pic_order_cnt_lsb_minus4 %d\n", (int) active_sps->log2_max_pic_order_cnt_lsb_minus4); + printf ("pic_order_cnt_type %d\n", (int) active_sps->pic_order_cnt_type); + printf ("num_ref_frames_in_pic_order_cnt_cycle %d\n", (int) active_sps->num_ref_frames_in_pic_order_cnt_cycle); + printf ("delta_pic_order_always_zero_flag %d\n", (int) active_sps->delta_pic_order_always_zero_flag); + printf ("offset_for_non_ref_pic %d\n", (int) active_sps->offset_for_non_ref_pic); + printf ("offset_for_top_to_bottom_field %d\n", (int) active_sps->offset_for_top_to_bottom_field); + printf ("offset_for_ref_frame[0] %d\n", (int) active_sps->offset_for_ref_frame[0]); + printf ("offset_for_ref_frame[1] %d\n", (int) active_sps->offset_for_ref_frame[1]); + printf ("POC in SLice Header\n"); + printf ("bottom_field_pic_order_in_frame_present_flag %d\n", (int) p_Vid->active_pps->bottom_field_pic_order_in_frame_present_flag); + printf ("delta_pic_order_cnt[0] %d\n", (int) p_Vid->delta_pic_order_cnt[0]); + printf ("delta_pic_order_cnt[1] %d\n", (int) p_Vid->delta_pic_order_cnt[1]); + printf ("delta_pic_order_cnt[2] %d\n", (int) p_Vid->delta_pic_order_cnt[2]); + printf ("idr_flag %d\n", (int) p_Vid->idr_flag); + printf ("MaxFrameNum %d\n", (int) p_Vid->MaxFrameNum); + + return 0; +} + +/*! + ************************************************************************ + * \brief + * return the poc of p_Vid as per (8-1) JVT-F100d2 + * POC200301 + ************************************************************************ + */ +int picture_order(VideoParameters *p_Vid) +{ + if (p_Vid->field_pic_flag==0) // is a frame + return p_Vid->framepoc; + else if (p_Vid->bottom_field_flag == FALSE) // top field + return p_Vid->toppoc; + else // bottom field + return p_Vid->bottompoc; +} + diff --git a/Src/h264dec/ldecod/src/image.c b/Src/h264dec/ldecod/src/image.c new file mode 100644 index 00000000..a75ff7ea --- /dev/null +++ b/Src/h264dec/ldecod/src/image.c @@ -0,0 +1,1699 @@ + +/*! + *********************************************************************** + * \file image.c + * + * \brief + * Decode a Slice + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langoy <inge.lille-langoy@telenor.com> + * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + * - Jani Lainema <jani.lainema@nokia.com> + * - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> + * - Byeong-Moon Jeon <jeonbm@lge.com> + * - Thomas Wedi <wedi@tnt.uni-hannover.de> + * - Gabi Blaettermann + * - Ye-Kui Wang <wyk@ieee.org> + * - Antti Hallapuro <antti.hallapuro@nokia.com> + * - Alexis Tourapis <alexismt@ieee.org> + * - Jill Boyce <jill.boyce@thomson.net> + * - Saurav K Bandyopadhyay <saurav@ieee.org> + * - Zhenyu Wu <Zhenyu.Wu@thomson.net + * - Purvin Pandit <Purvin.Pandit@thomson.net> + * + *********************************************************************** + */ + +#include "contributors.h" + +#include <math.h> +#include <limits.h> + +#include "global.h" +#include "image.h" +#include "fmo.h" +#include "nalu.h" +#include "parset.h" +#include "header.h" + +#include "sei.h" +#include "output.h" +#include "mb_access.h" +#include "memalloc.h" +#include "macroblock.h" + +#include "loopfilter.h" + +#include "biaridecod.h" +#include "context_ini.h" +#include "cabac.h" +#include "vlc.h" +#include "quant.h" + +#include "errorconcealment.h" +#include "erc_api.h" + +/*! + ************************************************************************ + * \brief + * checks if the System is big- or little-endian + * \return + * 0, little-endian (e.g. Intel architectures) + * 1, big-endian (e.g. SPARC, MIPS, PowerPC) + ************************************************************************ + */ +int testEndian(void) +{ + short s; + byte *p; + + p=(byte*)&s; + + s=1; + + return (*p==0); +} + +static int read_new_slice(Slice *currSlice, uint64_t time_code); +/*! + ************************************************************************ + * \brief + * Initializes the parameters for a new picture + ************************************************************************ + */ +// benski> time_code is user-passed value +static void init_picture(VideoParameters *p_Vid, Slice *currSlice, InputParameters *p_Inp, uint64_t time_code) +{ + int i; + int nplane; + + if (p_Vid->dec_picture) + { + // this may only happen on slice loss + exit_picture(p_Vid, &p_Vid->dec_picture); + } + if (p_Vid->recovery_point) + p_Vid->recovery_frame_num = (p_Vid->frame_num + p_Vid->recovery_frame_cnt) % p_Vid->MaxFrameNum; + + if (p_Vid->idr_flag) + p_Vid->recovery_frame_num = p_Vid->frame_num; + + if (p_Vid->recovery_point == 0 && + p_Vid->pre_frame_num != INT_MIN && + p_Vid->frame_num != p_Vid->pre_frame_num && + p_Vid->frame_num != (p_Vid->pre_frame_num + 1) % p_Vid->MaxFrameNum) + { + if (p_Vid->active_sps->gaps_in_frame_num_value_allowed_flag == 0) + { + /* Advanced Error Concealment would be called here to combat unintentional loss of pictures. */ + error("An unintentional loss of pictures occurs! Exit\n", 100); + + } + fill_frame_num_gap(p_Vid); + } + + if(p_Vid->nal_reference_idc) + { + p_Vid->pre_frame_num = p_Vid->frame_num; + } + + p_Vid->num_dec_mb = 0; + + //calculate POC + decode_poc(p_Vid); + + if (p_Vid->recovery_frame_num == p_Vid->frame_num && + p_Vid->recovery_poc == 0x7fffffff) + p_Vid->recovery_poc = p_Vid->framepoc; + + if(p_Vid->nal_reference_idc) + p_Vid->last_ref_pic_poc = p_Vid->framepoc; + + // dumppoc (p_Vid); + + p_Vid->dec_picture = alloc_storable_picture (p_Vid, (PictureStructure) p_Vid->structure, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + p_Vid->dec_picture->time_code = time_code; + p_Vid->dec_picture->top_poc=p_Vid->toppoc; + p_Vid->dec_picture->bottom_poc=p_Vid->bottompoc; + p_Vid->dec_picture->frame_poc=p_Vid->framepoc; + p_Vid->dec_picture->qp = p_Vid->qp; + p_Vid->dec_picture->slice_qp_delta = currSlice->slice_qp_delta; + p_Vid->dec_picture->chroma_qp_offset[0] = p_Vid->active_pps->chroma_qp_index_offset; + p_Vid->dec_picture->chroma_qp_offset[1] = p_Vid->active_pps->second_chroma_qp_index_offset; + + // reset all variables of the error concealment instance before decoding of every frame. + // here the third parameter should, if perfectly, be equal to the number of slices per frame. + // using little value is ok, the code will allocate more memory if the slice number is larger + ercReset(p_Vid->erc_errorVar, p_Vid->PicSizeInMbs, p_Vid->PicSizeInMbs, p_Vid->dec_picture->size_x); + p_Vid->erc_mvperMB = 0; + + switch (p_Vid->structure ) + { + case TOP_FIELD: + { + p_Vid->dec_picture->poc=p_Vid->toppoc; + p_Vid->number *= 2; + break; + } + case BOTTOM_FIELD: + { + p_Vid->dec_picture->poc=p_Vid->bottompoc; + p_Vid->number = p_Vid->number * 2 + 1; + break; + } + case FRAME: + { + p_Vid->dec_picture->poc=p_Vid->framepoc; + break; + } + default: + error("p_Vid->structure not initialized", 235); + } + + p_Vid->current_slice_nr=0; + + if (p_Vid->type > SI_SLICE) + { + set_ec_flag(p_Vid, SE_PTYPE); + p_Vid->type = P_SLICE; // concealed element + } + + // CAVLC init + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + memset(p_Vid->nz_coeff[0], -1, p_Vid->PicSizeInMbs * 48 *sizeof(byte)); // 3 * 4 * 4 + } + + if(p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0; i<(int)p_Vid->PicSizeInMbs; ++i) + { + p_Vid->intra_block[i] = 1; + } + } + + // Set the slice_nr member of each MB to -1, to ensure correct when packet loss occurs + // TO set Macroblock Map (mark all MBs as 'have to be concealed') + if( IS_INDEPENDENT(p_Vid) ) + { + for( nplane=0; nplane<MAX_PLANE; ++nplane ) + { + for(i=0; i<(int)p_Vid->PicSizeInMbs; ++i) + { + p_Vid->mb_data_JV[nplane][i].slice_nr = -1; + p_Vid->mb_data_JV[nplane][i].ei_flag = 1; + p_Vid->mb_data_JV[nplane][i].dpl_flag = 0; + } + } + } + else + { + for(i=0; i<(int)p_Vid->PicSizeInMbs; ++i) + { + p_Vid->mb_data[i].slice_nr = -1; + p_Vid->mb_data[i].ei_flag = 1; + p_Vid->mb_data[i].dpl_flag = 0; + } + } + + p_Vid->dec_picture->slice_type = p_Vid->type; + p_Vid->dec_picture->used_for_reference = (p_Vid->nal_reference_idc != 0); + p_Vid->dec_picture->idr_flag = p_Vid->idr_flag; + p_Vid->dec_picture->no_output_of_prior_pics_flag = p_Vid->no_output_of_prior_pics_flag; + p_Vid->dec_picture->long_term_reference_flag = p_Vid->long_term_reference_flag; + p_Vid->dec_picture->adaptive_ref_pic_buffering_flag = p_Vid->adaptive_ref_pic_buffering_flag; + + p_Vid->dec_picture->dec_ref_pic_marking_buffer = p_Vid->dec_ref_pic_marking_buffer; + p_Vid->dec_ref_pic_marking_buffer = NULL; + + p_Vid->dec_picture->mb_aff_frame_flag = p_Vid->mb_aff_frame_flag; + p_Vid->dec_picture->PicWidthInMbs = p_Vid->PicWidthInMbs; + + if (p_Vid->dec_picture->mb_aff_frame_flag) + { + p_Vid->get_mb_block_pos = get_mb_block_pos_mbaff; + p_Vid->getNeighbour = getAffNeighbour; + p_Vid->getNeighbourXP_NoPos = getAffNeighbour; + p_Vid->getNeighbourPX_NoPos = getAffNeighbour; + p_Vid->getNeighbourLuma = getAffNeighbourLuma; + p_Vid->getNeighbourPXLuma = getAffNeighbourLuma; + p_Vid->getNeighbourXPLuma = getAffNeighbourXPLuma; + p_Vid->getNeighbourLeftLuma = getAffNeighbourN0Luma; + p_Vid->getNeighbourNXLuma = getAffNeighbourNXLuma; + p_Vid->getNeighbourLeft = getAffNeighbourN0; + p_Vid->getNeighbourUp = getAffNeighbour0N; + p_Vid->getNeighbourUpLuma = getAffNeighbour0NLuma; + p_Vid->getNeighbourNX = getAffNeighbourNX; + p_Vid->getNeighbourNP = getAffNeighbourNX; + p_Vid->getNeighbourNPChromaNB = getAffNeighbourNX; + p_Vid->getNeighbour0X = getAffNeighbour0X; + p_Vid->getNeighbour0XLuma = getAffNeighbour0XLuma; + p_Vid->getNeighbourX0 = getAffNeighbourX0; + p_Vid->getNeighbourNPLumaNB = getAffNeighbourNPLuma; + p_Vid->getNeighbourPXLumaNB = getAffNeighbourPXLumaNB; + p_Vid->getNeighbourPXLumaNB_NoPos = getAffNeighbourPXLumaNB_NoPos; + p_Vid->getNeighbourXPLumaNB = getAffNeighbourXPLuma; + p_Vid->getNeighbourPPLumaNB = getAffNeighbourPPLumaNB; + p_Vid->getNeighbourXPLumaNB_NoPos = getAffNeighbourXPLuma; + } + else + { + p_Vid->get_mb_block_pos = get_mb_block_pos_normal; + p_Vid->getNeighbour = getNonAffNeighbour; + p_Vid->getNeighbourXP_NoPos = getNonAffNeighbourXP_NoPos; + p_Vid->getNeighbourPX_NoPos = getNonAffNeighbourPX_NoPos; + p_Vid->getNeighbourLuma = getNonAffNeighbourLuma; + p_Vid->getNeighbourPXLuma = getNonAffNeighbourPXLuma; + p_Vid->getNeighbourXPLuma = getNonAffNeighbourXPLuma; + p_Vid->getNeighbourLeftLuma = getNonAffNeighbourN0Luma; + p_Vid->getNeighbourNXLuma = getNonAffNeighbourNXLuma; + p_Vid->getNeighbourLeft = getNonAffNeighbourN0; + p_Vid->getNeighbourUp = getNonAffNeighbour0N; + p_Vid->getNeighbourUpLuma = getNonAffNeighbour0NLuma; + p_Vid->getNeighbourNX = getNonAffNeighbourNX; + p_Vid->getNeighbourNP = getNonAffNeighbourNP; + p_Vid->getNeighbourNPChromaNB = getNonAffNeighbourNPChromaNB; + p_Vid->getNeighbour0X = getNonAffNeighbour0X; + p_Vid->getNeighbour0XLuma = getNonAffNeighbour0XLuma; + p_Vid->getNeighbourX0 = getNonAffNeighbourX0; + p_Vid->getNeighbourNPLumaNB = getNonAffNeighbourNPLumaNB; + p_Vid->getNeighbourPXLumaNB = getNonAffNeighbourPXLumaNB; + p_Vid->getNeighbourPXLumaNB_NoPos = getNonAffNeighbourPXLumaNB_NoPos; + p_Vid->getNeighbourXPLumaNB = getNonAffNeighbourXPLumaNB; + p_Vid->getNeighbourPPLumaNB = getNonAffNeighbourPPLumaNB; + p_Vid->getNeighbourXPLumaNB_NoPos = getNonAffNeighbourXPLumaNB_NoPos; + } + + p_Vid->dec_picture->pic_num = p_Vid->frame_num; + p_Vid->dec_picture->frame_num = p_Vid->frame_num; + + p_Vid->dec_picture->recovery_frame = (unsigned int) (p_Vid->frame_num == p_Vid->recovery_frame_num); + + p_Vid->dec_picture->coded_frame = (p_Vid->structure==FRAME); + + p_Vid->dec_picture->chroma_format_idc = p_Vid->active_sps->chroma_format_idc; + + p_Vid->dec_picture->frame_mbs_only_flag = p_Vid->active_sps->frame_mbs_only_flag; + p_Vid->dec_picture->frame_cropping_flag = p_Vid->active_sps->frame_cropping_flag; + + if (p_Vid->dec_picture->frame_cropping_flag) + { + p_Vid->dec_picture->frame_cropping_rect_left_offset = p_Vid->active_sps->frame_cropping_rect_left_offset; + p_Vid->dec_picture->frame_cropping_rect_right_offset = p_Vid->active_sps->frame_cropping_rect_right_offset; + p_Vid->dec_picture->frame_cropping_rect_top_offset = p_Vid->active_sps->frame_cropping_rect_top_offset; + p_Vid->dec_picture->frame_cropping_rect_bottom_offset = p_Vid->active_sps->frame_cropping_rect_bottom_offset; + } + +#if (ENABLE_OUTPUT_TONEMAPPING) + // store the necessary tone mapping sei into StorablePicture structure + p_Vid->dec_picture->seiHasTone_mapping = 0; + + if (p_Vid->seiToneMapping->seiHasTone_mapping) + { + p_Vid->dec_picture->seiHasTone_mapping = 1; + p_Vid->dec_picture->tone_mapping_model_id = p_Vid->seiToneMapping->model_id; + p_Vid->dec_picture->tonemapped_bit_depth = p_Vid->seiToneMapping->sei_bit_depth; + p_Vid->dec_picture->tone_mapping_lut = malloc(sizeof(int)*(1<<p_Vid->seiToneMapping->coded_data_bit_depth)); + if (NULL == p_Vid->dec_picture->tone_mapping_lut) + { + no_mem_exit("init_picture: tone_mapping_lut"); + } + memcpy(p_Vid->dec_picture->tone_mapping_lut, p_Vid->seiToneMapping->lut, sizeof(imgpel)*(1<<p_Vid->seiToneMapping->coded_data_bit_depth)); + update_tone_mapping_sei(p_Vid->seiToneMapping); + } +#endif + + if( IS_INDEPENDENT(p_Vid) ) + { + p_Vid->dec_picture_JV[0] = p_Vid->dec_picture; + p_Vid->dec_picture_JV[1] = alloc_storable_picture (p_Vid, (PictureStructure) p_Vid->structure, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + copy_dec_picture_JV( p_Vid, p_Vid->dec_picture_JV[1], p_Vid->dec_picture_JV[0] ); + p_Vid->dec_picture_JV[2] = alloc_storable_picture (p_Vid, (PictureStructure) p_Vid->structure, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + copy_dec_picture_JV( p_Vid, p_Vid->dec_picture_JV[2], p_Vid->dec_picture_JV[0] ); + } +} + +void MbAffPostProc(VideoParameters *p_Vid) +{ + imgpel temp[32][16]; + + StorablePicture *dec_picture = p_Vid->dec_picture; + imgpel ** imgY = dec_picture->imgY->img; + imgpel **imgUV[2] = {dec_picture->imgUV[0]->img, dec_picture->imgUV[1]->img}; + + short i, y, x0, y0, uv; + for (i=0; i<(int)dec_picture->PicSizeInMbs; i+=2) + { + if (dec_picture->motion.mb_field[i]) + { + get_mb_pos(p_Vid, i, p_Vid->mb_size[IS_LUMA], &x0, &y0); + for (y=0; y<(2*MB_BLOCK_SIZE);++y) + memcpy(temp[y], &imgY[y0+y][x0], MB_BLOCK_SIZE * sizeof(imgpel)); + + for (y=0; y<MB_BLOCK_SIZE; ++y) + { + memcpy(&imgY[y0+(2*y )][x0], temp[y ], MB_BLOCK_SIZE * sizeof(imgpel)); + memcpy(&imgY[y0+(2*y+1)][x0], temp[y+MB_BLOCK_SIZE], MB_BLOCK_SIZE * sizeof(imgpel)); + } + + + if (dec_picture->chroma_format_idc != YUV400) + { + x0 = (short) (x0 / (16/p_Vid->mb_cr_size_x)); + y0 = (short) (y0 / (16/p_Vid->mb_cr_size_y)); + + for (uv=0; uv<2; ++uv) + { + for (y=0; y<(2*p_Vid->mb_cr_size_y);++y) + memcpy(temp[y], &imgUV[uv][y0+y][x0], p_Vid->mb_cr_size_x * sizeof(imgpel)); + + for (y=0; y<p_Vid->mb_cr_size_y;++y) + { + memcpy(&imgUV[uv][y0+(2*y )][x0], temp[y ], p_Vid->mb_cr_size_x * sizeof(imgpel)); + memcpy(&imgUV[uv][y0+(2*y+1)][x0], temp[y+p_Vid->mb_cr_size_y], p_Vid->mb_cr_size_x * sizeof(imgpel)); + } + } + } + } + } +} + +static void fill_wp_params(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + + int i, j, k; + int comp; + int log_weight_denom; + int tb, td; + int tx,DistScaleFactor; + + int max_l0_ref = currSlice->num_ref_idx_l0_active; + int max_l1_ref = currSlice->num_ref_idx_l1_active; + + if (p_Vid->active_pps->weighted_bipred_idc == 2) + { + currSlice->luma_log2_weight_denom = 5; + currSlice->chroma_log2_weight_denom = 5; + currSlice->wp_round_luma = 16; + currSlice->wp_round_chroma = 16; + + for (i=0; i<MAX_REFERENCE_PICTURES; ++i) + { + for (comp=0; comp<3; ++comp) + { + log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom; + currSlice->wp_weight[0][i][comp] = 1<<log_weight_denom; + currSlice->wp_weight[1][i][comp] = 1<<log_weight_denom; + currSlice->wp_offset[0][i][comp] = 0; + currSlice->wp_offset[1][i][comp] = 0; + } + } + } + + + for (i=0; i<max_l0_ref; ++i) + { + for (j=0; j<max_l1_ref; ++j) + { + for (comp = 0; comp<3; ++comp) + { + log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom; + if (p_Vid->active_pps->weighted_bipred_idc == 1) + { + currSlice->wbp_weight[0][i][j][comp] = currSlice->wp_weight[0][i][comp]; + currSlice->wbp_weight[1][i][j][comp] = currSlice->wp_weight[1][j][comp]; + } + else if (p_Vid->active_pps->weighted_bipred_idc == 2) + { + td = iClip3(-128,127,p_Vid->listX[LIST_1][j]->poc - p_Vid->listX[LIST_0][i]->poc); + if (td == 0 || p_Vid->listX[LIST_1][j]->is_long_term || p_Vid->listX[LIST_0][i]->is_long_term) + { + currSlice->wbp_weight[0][i][j][comp] = 32; + currSlice->wbp_weight[1][i][j][comp] = 32; + } + else + { + tb = iClip3(-128,127,p_Vid->ThisPOC - p_Vid->listX[LIST_0][i]->poc); + + tx = (16384 + iabs(td/2))/td; + DistScaleFactor = iClip3(-1024, 1023, (tx*tb + 32 )>>6); + + currSlice->wbp_weight[1][i][j][comp] = DistScaleFactor >> 2; + currSlice->wbp_weight[0][i][j][comp] = 64 - currSlice->wbp_weight[1][i][j][comp]; + if (currSlice->wbp_weight[1][i][j][comp] < -64 || currSlice->wbp_weight[1][i][j][comp] > 128) + { + currSlice->wbp_weight[0][i][j][comp] = 32; + currSlice->wbp_weight[1][i][j][comp] = 32; + currSlice->wp_offset[0][i][comp] = 0; + currSlice->wp_offset[1][j][comp] = 0; + } + } + } + } + } + } + + + if (currSlice->mb_aff_frame_flag) + { + for (i=0; i<2*max_l0_ref; ++i) + { + for (j=0; j<2*max_l1_ref; ++j) + { + for (comp = 0; comp<3; ++comp) + { + for (k=2; k<6; k+=2) + { + currSlice->wp_offset[k+0][i][comp] = currSlice->wp_offset[0][i>>1][comp]; + currSlice->wp_offset[k+1][j][comp] = currSlice->wp_offset[1][j>>1][comp]; + + log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom; + if (p_Vid->active_pps->weighted_bipred_idc == 1) + { + currSlice->wbp_weight[k+0][i][j][comp] = currSlice->wp_weight[0][i>>1][comp]; + currSlice->wbp_weight[k+1][i][j][comp] = currSlice->wp_weight[1][j>>1][comp]; + } + else if (p_Vid->active_pps->weighted_bipred_idc == 2) + { + td = iClip3(-128,127,p_Vid->listX[k+LIST_1][j]->poc - p_Vid->listX[k+LIST_0][i]->poc); + if (td == 0 || p_Vid->listX[k+LIST_1][j]->is_long_term || p_Vid->listX[k+LIST_0][i]->is_long_term) + { + currSlice->wbp_weight[k+0][i][j][comp] = 32; + currSlice->wbp_weight[k+1][i][j][comp] = 32; + } + else + { + tb = iClip3(-128,127,((k==2)?p_Vid->toppoc:p_Vid->bottompoc) - p_Vid->listX[k+LIST_0][i]->poc); + + tx = (16384 + iabs(td/2))/td; + DistScaleFactor = iClip3(-1024, 1023, (tx*tb + 32 )>>6); + + currSlice->wbp_weight[k+1][i][j][comp] = DistScaleFactor >> 2; + currSlice->wbp_weight[k+0][i][j][comp] = 64 - currSlice->wbp_weight[k+1][i][j][comp]; + if (currSlice->wbp_weight[k+1][i][j][comp] < -64 || currSlice->wbp_weight[k+1][i][j][comp] > 128) + { + currSlice->wbp_weight[k+1][i][j][comp] = 32; + currSlice->wbp_weight[k+0][i][j][comp] = 32; + currSlice->wp_offset[k+0][i][comp] = 0; + currSlice->wp_offset[k+1][j][comp] = 0; + } + } + } + } + } + } + } + } +} + +static void decode_slice(Slice *currSlice, int current_header) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + + if (p_Vid->active_pps->entropy_coding_mode_flag) + { + init_contexts (currSlice); + cabac_new_slice(currSlice); + } + + if ( (p_Vid->active_pps->weighted_bipred_idc > 0 && (currSlice->slice_type == B_SLICE)) || (p_Vid->active_pps->weighted_pred_flag && currSlice->slice_type !=I_SLICE)) + fill_wp_params(currSlice); + + //printf("frame picture %d %d %d\n",p_Vid->structure,p_Vid->ThisPOC,currSlice->direct_spatial_mv_pred_flag); + + // decode main slice information + if (current_header == SOP || current_header == SOS) + decode_one_slice(currSlice); + + // setMB-Nr in case this slice was lost + // if(currSlice->ei_flag) + // p_Vid->current_mb_nr = currSlice->last_mb_nr + 1; +} + + +/*! + ************************************************************************ + * \brief + * Error tracking: if current frame is lost or any reference frame of + * current frame is lost, current frame is incorrect. + ************************************************************************ + */ +static void Error_tracking(VideoParameters *p_Vid) +{ + Slice *currSlice = p_Vid->currentSlice; + int i; + + if(p_Vid->redundant_pic_cnt == 0) + { + p_Vid->Is_primary_correct = p_Vid->Is_redundant_correct = 1; + } + + if(p_Vid->redundant_pic_cnt == 0 && p_Vid->type != I_SLICE) + { + for(i=0;i<currSlice->num_ref_idx_l0_active;++i) + { + if(p_Vid->ref_flag[i] == 0) // any reference of primary slice is incorrect + { + p_Vid->Is_primary_correct = 0; // primary slice is incorrect + } + } + } + else if(p_Vid->redundant_pic_cnt != 0 && p_Vid->type != I_SLICE) + { + if(p_Vid->ref_flag[p_Vid->redundant_slice_ref_idx] == 0) // reference of redundant slice is incorrect + { + p_Vid->Is_redundant_correct = 0; // redundant slice is incorrect + } + } +} + +/*! + *********************************************************************** + * \brief + * decodes one I- or P-frame + * + *********************************************************************** + */ + +int decode_one_frame(VideoParameters *p_Vid, uint64_t time_code) +{ + InputParameters *p_Inp = p_Vid->p_Inp; + + int current_header; + int i; + + while (1) + { + Slice *currSlice = p_Vid->currentSlice; + currSlice->p_Vid = p_Vid; + currSlice->p_Inp = p_Inp; + + current_header = read_new_slice(p_Vid->currentSlice, time_code); + if (current_header == EOS) + return EOS; + + // error tracking of primary and redundant slices. + Error_tracking(p_Vid); + + // If primary and redundant are received and primary is correct, discard the redundant + // else, primary slice will be replaced with redundant slice. + if(p_Vid->frame_num == p_Vid->previous_frame_num && p_Vid->redundant_pic_cnt !=0 + && p_Vid->Is_primary_correct !=0 && current_header != EOS) + { + continue; + } + + // update reference flags and set current p_Vid->ref_flag + if(!(p_Vid->redundant_pic_cnt != 0 && p_Vid->previous_frame_num == p_Vid->frame_num)) + { + for(i=16;i>0;i--) + { + p_Vid->ref_flag[i] = p_Vid->ref_flag[i-1]; + } + } + p_Vid->ref_flag[0] = p_Vid->redundant_pic_cnt==0 ? p_Vid->Is_primary_correct : p_Vid->Is_redundant_correct; + p_Vid->previous_frame_num = p_Vid->frame_num; + + if (current_header == EOS) + { +// exit_picture(p_Vid, &p_Vid->dec_picture); + return EOS; + } + + if((p_Vid->active_sps->chroma_format_idc==0)||(p_Vid->active_sps->chroma_format_idc==3)) + { + currSlice->linfo_cbp_intra = linfo_cbp_intra_other; + currSlice->linfo_cbp_inter = linfo_cbp_inter_other; + } + else + { + currSlice->linfo_cbp_intra = linfo_cbp_intra_normal; + currSlice->linfo_cbp_inter = linfo_cbp_inter_normal; + } + + decode_slice(currSlice, current_header); + + ++(p_Vid->current_slice_nr); + } + + exit_picture(p_Vid, &p_Vid->dec_picture); + + return (SOP); +} + + +/*! + ************************************************************************ + * \brief + * Convert file read buffer to source picture structure + * \param imgX + * Pointer to image plane + * \param buf + * Buffer for file output + * \param size_x + * horizontal image size in pixel + * \param size_y + * vertical image size in pixel + * \param symbol_size_in_bytes + * number of bytes used per pel + ************************************************************************ + */ +void buffer2img (imgpel** imgX, unsigned char* buf, int size_x, int size_y, int symbol_size_in_bytes) +{ + int i,j; + + uint16 tmp16, ui16; + unsigned long tmp32, ui32; + + if (symbol_size_in_bytes> sizeof(imgpel)) + { + error ("Source picture has higher bit depth than imgpel data type. \nPlease recompile with larger data type for imgpel.", 500); + } + + if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes)) + { + // imgpel == pixel_in_file == 1 byte -> simple copy + memcpy(&imgX[0][0], buf, size_x * size_y); + } + else + { + // sizeof (imgpel) > sizeof(char) + if (testEndian()) + { + // big endian + switch (symbol_size_in_bytes) + { + case 1: + { + for(j = 0; j < size_y; ++j) + for(i = 0; i < size_x; ++i) + { + imgX[j][i]= buf[i+j*size_x]; + } + break; + } + case 2: + { + for(j=0;j<size_y;++j) + for(i=0;i<size_x;++i) + { + memcpy(&tmp16, buf+((i+j*size_x)*2), 2); + ui16 = (uint16) ((tmp16 >> 8) | ((tmp16&0xFF)<<8)); + imgX[j][i] = (imgpel) ui16; + } + break; + } + case 4: + { + for(j=0;j<size_y;++j) + for(i=0;i<size_x;++i) + { + memcpy(&tmp32, buf+((i+j*size_x)*4), 4); + ui32 = ((tmp32&0xFF00)<<8) | ((tmp32&0xFF)<<24) | ((tmp32&0xFF0000)>>8) | ((tmp32&0xFF000000)>>24); + imgX[j][i] = (imgpel) ui32; + } + } + default: + { + error ("reading only from formats of 8, 16 or 32 bit allowed on big endian architecture", 500); + break; + } + } + + } + else + { + // little endian + if (symbol_size_in_bytes == 1) + { + for (j=0; j < size_y; ++j) + { + for (i=0; i < size_x; ++i) + { + imgX[j][i]=*(buf++); + } + } + } + else + { + for (j=0; j < size_y; ++j) + { + int jpos = j*size_x; + for (i=0; i < size_x; ++i) + { + imgX[j][i]=0; + memcpy(&(imgX[j][i]), buf +((i+jpos)*symbol_size_in_bytes), symbol_size_in_bytes); + } + } + } + + } + } +} + +void reorder_lists(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + if ((currSlice->slice_type != I_SLICE)&&(currSlice->slice_type != SI_SLICE)) + { + if (currSlice->ref_pic_list_reordering_flag_l0) + { + reorder_ref_pic_list(p_Vid, p_Vid->listX[0], &p_Vid->listXsize[0], + currSlice->num_ref_idx_l0_active - 1, + currSlice->reordering_of_pic_nums_idc_l0, + currSlice->abs_diff_pic_num_minus1_l0, + currSlice->long_term_pic_idx_l0); + } + if (p_Vid->no_reference_picture == p_Vid->listX[0][currSlice->num_ref_idx_l0_active-1]) + { + if (p_Vid->non_conforming_stream) + printf("RefPicList0[ num_ref_idx_l0_active_minus1 ] is equal to 'no reference picture'\n"); + else + error("RefPicList0[ num_ref_idx_l0_active_minus1 ] is equal to 'no reference picture', invalid bitstream",500); + } + // that's a definition + p_Vid->listXsize[0] = (char) currSlice->num_ref_idx_l0_active; + } + if (currSlice->slice_type == B_SLICE) + { + if (currSlice->ref_pic_list_reordering_flag_l1) + { + reorder_ref_pic_list(p_Vid, p_Vid->listX[1], &p_Vid->listXsize[1], + currSlice->num_ref_idx_l1_active - 1, + currSlice->reordering_of_pic_nums_idc_l1, + currSlice->abs_diff_pic_num_minus1_l1, + currSlice->long_term_pic_idx_l1); + } + if (p_Vid->no_reference_picture == p_Vid->listX[1][currSlice->num_ref_idx_l1_active-1]) + { + if (p_Vid->non_conforming_stream) + printf("RefPicList1[ num_ref_idx_l1_active_minus1 ] is equal to 'no reference picture'\n"); + else + error("RefPicList1[ num_ref_idx_l1_active_minus1 ] is equal to 'no reference picture', invalid bitstream",500); + } + // that's a definition + p_Vid->listXsize[1] = (char) currSlice->num_ref_idx_l1_active; + } + + free_ref_pic_list_reordering_buffer(currSlice); +} + + +/*! + ************************************************************************ + * \brief + * initialize ref_pic_num array + ************************************************************************ + */ +void set_ref_pic_num(VideoParameters *p_Vid) +{ + int i,j; + StorablePicture *dec_picture = p_Vid->dec_picture; + int slice_id=p_Vid->current_slice_nr; + + for (i=0;i<p_Vid->listXsize[LIST_0];++i) + { + dec_picture->ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->poc * 2 + ((p_Vid->listX[LIST_0][i]->structure==BOTTOM_FIELD)?1:0) ; + dec_picture->frm_ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->frame_poc * 2; + dec_picture->top_ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->top_poc * 2; + dec_picture->bottom_ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->bottom_poc * 2 + 1; + //printf("POCS %d %d %d %d ",p_Vid->listX[LIST_0][i]->frame_poc,p_Vid->listX[LIST_0][i]->bottom_poc,p_Vid->listX[LIST_0][i]->top_poc,p_Vid->listX[LIST_0][i]->poc); + //printf("refid %d %d %d %d\n",(int) dec_picture->frm_ref_pic_num[LIST_0][i],(int) dec_picture->top_ref_pic_num[LIST_0][i],(int) dec_picture->bottom_ref_pic_num[LIST_0][i],(int) dec_picture->ref_pic_num[LIST_0][i]); + } + + for (i=0;i<p_Vid->listXsize[LIST_1];++i) + { + dec_picture->ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->poc *2 + ((p_Vid->listX[LIST_1][i]->structure==BOTTOM_FIELD)?1:0); + dec_picture->frm_ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->frame_poc * 2; + dec_picture->top_ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->top_poc * 2; + dec_picture->bottom_ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->bottom_poc * 2 + 1; + } + + if (!p_Vid->active_sps->frame_mbs_only_flag) + { + if (p_Vid->structure==FRAME) + { + for (j=2;j<6;++j) + { + for (i=0;i<p_Vid->listXsize[j];++i) + { + dec_picture->ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->poc * 2 + ((p_Vid->listX[j][i]->structure==BOTTOM_FIELD)?1:0); + dec_picture->frm_ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->frame_poc * 2 ; + dec_picture->top_ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->top_poc * 2 ; + dec_picture->bottom_ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->bottom_poc * 2 + 1; + } + } + } + } +} + +/*! + ************************************************************************ + * \brief + * Reads new slice from bit_stream + ************************************************************************ + */ + +// benski> time_code is user-passed data +static int read_new_slice(Slice *currSlice, uint64_t time_code) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + InputParameters *p_Inp = currSlice->p_Inp; + + NALU_t *nalu = p_Vid->nalu; + + int current_header = 0; + Bitstream *currStream; + + int slice_id_a, slice_id_b, slice_id_c; + int redundant_pic_cnt_b, redundant_pic_cnt_c; + + if (!nalu) + return EOS; + + for (;;) + { + if (0 == read_next_nalu(p_Vid, nalu)) + return EOS; + +process_nalu: + + switch (nalu->nal_unit_type) + { + case NALU_TYPE_SLICE: + case NALU_TYPE_IDR: + + if (p_Vid->recovery_point || nalu->nal_unit_type == NALU_TYPE_IDR) + { + if (p_Vid->recovery_point_found == 0) + { + if (nalu->nal_unit_type != NALU_TYPE_IDR) + { + printf("Warning: Decoding does not start with an IDR picture.\n"); + p_Vid->non_conforming_stream = 1; + } + else + p_Vid->non_conforming_stream = 0; + } + p_Vid->recovery_point_found = 1; + } + + if (p_Vid->recovery_point_found == 0) + break; + + p_Vid->idr_flag = (nalu->nal_unit_type == NALU_TYPE_IDR); + p_Vid->nal_reference_idc = nalu->nal_reference_idc; + + currSlice->dp_mode = PAR_DP_1; + currSlice->max_part_nr = 1; + currStream = currSlice->partArr[0].bitstream; + currStream->frame_bitoffset = currStream->read_len = 0; + //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->streamBuffer = &nalu->buf[1]; + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + // Some syntax of the Slice Header depends on the parameter set, which depends on + // the parameter set ID of the SLice header. Hence, read the pic_parameter_set_id + // of the slice header first, then setup the active parameter sets, and then read + // the rest of the slice header + FirstPartOfSliceHeader(currSlice); + UseParameterSet (currSlice, currSlice->pic_parameter_set_id); + RestOfSliceHeader (currSlice); + + fmo_init (p_Vid); + currSlice->active_sps = p_Vid->active_sps; + currSlice->active_pps = p_Vid->active_pps; + + assign_quant_params (currSlice); + + // if primary slice is replaced with redundant slice, set the correct image type + if(p_Vid->redundant_pic_cnt && p_Vid->Is_primary_correct==0 && p_Vid->Is_redundant_correct) + { + p_Vid->dec_picture->slice_type = p_Vid->type; + } + + if(is_new_picture(p_Vid->dec_picture, currSlice, p_Vid->old_slice)) + { + init_picture(p_Vid, currSlice, p_Inp, time_code); + + current_header = SOP; + //check zero_byte if it is also the first NAL unit in the access unit + CheckZeroByteVCL(p_Vid, nalu); + } + else + current_header = SOS; + + if (currSlice->slice_type == B_SLICE) + { + if( IS_INDEPENDENT(p_Vid) ) + { + int nplane = 0; + for( nplane=0; nplane<MAX_PLANE; nplane++ ) + { + if( NULL != currSlice->Co_located_JV[nplane] ) + { + free_colocated(p_Vid, currSlice->Co_located_JV[nplane]); + currSlice->Co_located_JV[nplane] = NULL; + } + currSlice->Co_located_JV[nplane] = alloc_colocated (p_Vid, p_Vid->width, p_Vid->height, p_Vid->active_sps->mb_adaptive_frame_field_flag); + } + } + else + { + if (NULL != currSlice->p_colocated) + { + free_colocated(p_Vid, currSlice->p_colocated); + currSlice->p_colocated = NULL; + } + currSlice->p_colocated = alloc_colocated (p_Vid, p_Vid->width, p_Vid->height,p_Vid->active_sps->mb_adaptive_frame_field_flag); + } + } + + + init_lists(currSlice); + reorder_lists (currSlice); + + if (p_Vid->structure==FRAME) + { + init_mbaff_lists(p_Vid); + } + + // From here on, p_Vid->active_sps, p_Vid->active_pps and the slice header are valid + if (currSlice->mb_aff_frame_flag) + p_Vid->current_mb_nr = currSlice->start_mb_nr << 1; + else + p_Vid->current_mb_nr = currSlice->start_mb_nr; + + if (p_Vid->active_pps->entropy_coding_mode_flag) + { + int ByteStartPosition = currStream->frame_bitoffset/8; + if (currStream->frame_bitoffset%8 != 0) + { + ++ByteStartPosition; + } + arideco_start_decoding (&currSlice->partArr[0].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len); + } + // printf ("read_new_slice: returning %s\n", current_header == SOP?"SOP":"SOS"); + p_Vid->recovery_point = 0; + return current_header; + break; + case NALU_TYPE_DPA: + // read DP_A + currSlice->dpB_NotPresent =1; + currSlice->dpC_NotPresent =1; + + p_Vid->idr_flag = FALSE; + p_Vid->nal_reference_idc = nalu->nal_reference_idc; + currSlice->dp_mode = PAR_DP_3; + currSlice->max_part_nr = 3; + currStream = currSlice->partArr[0].bitstream; + currStream->frame_bitoffset = currStream->read_len = 0; + //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->streamBuffer = &nalu->buf[1]; + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + FirstPartOfSliceHeader(currSlice); + UseParameterSet (currSlice, currSlice->pic_parameter_set_id); + RestOfSliceHeader (currSlice); + + fmo_init (p_Vid); + + if(is_new_picture(p_Vid->dec_picture, currSlice, p_Vid->old_slice)) + { + init_picture(p_Vid, currSlice, p_Inp, time_code); + current_header = SOP; + CheckZeroByteVCL(p_Vid, nalu); + } + else + current_header = SOS; + + init_lists(currSlice); + reorder_lists (currSlice); + + if (p_Vid->structure==FRAME) + { + init_mbaff_lists(p_Vid); + } + + // From here on, p_Vid->active_sps, p_Vid->active_pps and the slice header are valid + if (currSlice->mb_aff_frame_flag) + p_Vid->current_mb_nr = currSlice->start_mb_nr << 1; + else + p_Vid->current_mb_nr = currSlice->start_mb_nr; + + // Now I need to read the slice ID, which depends on the value of + // redundant_pic_cnt_present_flag + + slice_id_a = ue_v("NALU: DP_A slice_id", currStream); + + if (p_Vid->active_pps->entropy_coding_mode_flag) + error ("received data partition with CABAC, this is not allowed", 500); + + // continue with reading next DP + if (0 == read_next_nalu(p_Vid, nalu)) + return current_header; + + if ( NALU_TYPE_DPB == nalu->nal_unit_type) + { + // we got a DPB + currStream = currSlice->partArr[1].bitstream; + currStream->frame_bitoffset = currStream->read_len = 0; + + //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->streamBuffer = &nalu->buf[1]; + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + slice_id_b = ue_v("NALU: DP_B slice_id", currStream); + + currSlice->dpB_NotPresent = 0; + + if ((slice_id_b != slice_id_a) || (nalu->lost_packets)) + { + printf ("Waning: got a data partition B which does not match DP_A (DP loss!)\n"); + currSlice->dpB_NotPresent =1; + currSlice->dpC_NotPresent =1; + } + else + { + if (p_Vid->active_pps->redundant_pic_cnt_present_flag) + redundant_pic_cnt_b = ue_v("NALU: DP_B redudant_pic_cnt", currStream); + else + redundant_pic_cnt_b = 0; + + // we're finished with DP_B, so let's continue with next DP + if (0 == read_next_nalu(p_Vid, nalu)) + return current_header; + } + } + else + { + currSlice->dpB_NotPresent =1; + } + + // check if we got DP_C + if ( NALU_TYPE_DPC == nalu->nal_unit_type) + { + currStream = currSlice->partArr[2].bitstream; + currStream->frame_bitoffset = currStream->read_len = 0; + + //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->streamBuffer = &nalu->buf[1]; + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + currSlice->dpC_NotPresent = 0; + + slice_id_c = ue_v("NALU: DP_C slice_id", currStream); + if ((slice_id_c != slice_id_a)|| (nalu->lost_packets)) + { + printf ("Warning: got a data partition C which does not match DP_A(DP loss!)\n"); + //currSlice->dpB_NotPresent =1; + currSlice->dpC_NotPresent =1; + } + + if (p_Vid->active_pps->redundant_pic_cnt_present_flag) + redundant_pic_cnt_c = ue_v("NALU:SLICE_C redudand_pic_cnt", currStream); + else + redundant_pic_cnt_c = 0; + } + else + { + currSlice->dpC_NotPresent =1; + } + + // check if we read anything else than the expected partitions + if ((nalu->nal_unit_type != NALU_TYPE_DPB) && (nalu->nal_unit_type != NALU_TYPE_DPC)) + { + // we have a NALI that we can't process here, so restart processing + goto process_nalu; + // yes, "goto" should not be used, but it's really the best way here before we restructure the decoding loop + // (which should be taken care of anyway) + } + + return current_header; + + break; + case NALU_TYPE_DPB: + printf ("found data partition B without matching DP A, discarding\n"); + break; + case NALU_TYPE_DPC: + printf ("found data partition C without matching DP A, discarding\n"); + break; + case NALU_TYPE_SEI: + //printf ("read_new_slice: Found NALU_TYPE_SEI, len %d\n", nalu->len); + InterpretSEIMessage(nalu->buf,nalu->len,p_Vid); + break; + case NALU_TYPE_PPS: + ProcessPPS(p_Vid, nalu); + break; + case NALU_TYPE_SPS: + ProcessSPS(p_Vid, nalu); + break; + case NALU_TYPE_AUD: +// printf ("read_new_slice: Found 'Access Unit Delimiter' NAL unit, len %d, ignored\n", nalu->len); + break; + case NALU_TYPE_EOSEQ: +// printf ("read_new_slice: Found 'End of Sequence' NAL unit, len %d, ignored\n", nalu->len); + break; + case NALU_TYPE_EOSTREAM: +// printf ("read_new_slice: Found 'End of Stream' NAL unit, len %d, ignored\n", nalu->len); + break; + case NALU_TYPE_FILL: + printf ("read_new_slice: Found NALU_TYPE_FILL, len %d\n", (int) nalu->len); + printf ("Skipping these filling bits, proceeding w/ next NALU\n"); + break; + default: + printf ("Found NALU type %d, len %d undefined, ignore NALU, moving on\n", (int) nalu->nal_unit_type, (int) nalu->len); + break; + } + } + + return current_header; +} + + + +/*! + ************************************************************************ + * \brief + * finish decoding of a picture, conceal errors and store it + * into the DPB + ************************************************************************ + */ +void exit_picture(VideoParameters *p_Vid, StorablePicture **dec_picture) +{ + InputParameters *p_Inp = p_Vid->p_Inp; + int ercStartMB; + int ercSegment; + frame recfr; + unsigned int i; + int structure, frame_poc, slice_type, refpic, qp, pic_num, chroma_format_idc, is_idr; + + int nplane; + + // return if the last picture has already been finished + if (*dec_picture==NULL) + { + return; + } + + recfr.p_Vid = p_Vid; + recfr.yptr = (*dec_picture)->imgY->base_address; + if ((*dec_picture)->chroma_format_idc != YUV400) + { + recfr.uptr = (*dec_picture)->imgUV[0]->base_address; + recfr.vptr = (*dec_picture)->imgUV[1]->base_address; + } + + //! this is always true at the beginning of a picture + ercStartMB = 0; + ercSegment = 0; + + //! mark the start of the first segment + if (!(*dec_picture)->mb_aff_frame_flag) + { + ercStartSegment(0, ercSegment, 0 , p_Vid->erc_errorVar); + //! generate the segments according to the macroblock map + for(i = 1; i<(*dec_picture)->PicSizeInMbs; ++i) + { + if(p_Vid->mb_data[i].ei_flag != p_Vid->mb_data[i-1].ei_flag) + { + ercStopSegment(i-1, ercSegment, 0, p_Vid->erc_errorVar); //! stop current segment + + //! mark current segment as lost or OK + if(p_Vid->mb_data[i-1].ei_flag) + ercMarkCurrSegmentLost((*dec_picture)->size_x, p_Vid->erc_errorVar); + else + ercMarkCurrSegmentOK((*dec_picture)->size_x, p_Vid->erc_errorVar); + + ++ercSegment; //! next segment + ercStartSegment(i, ercSegment, 0 , p_Vid->erc_errorVar); //! start new segment + ercStartMB = i;//! save start MB for this segment + } + } + //! mark end of the last segment + ercStopSegment((*dec_picture)->PicSizeInMbs-1, ercSegment, 0, p_Vid->erc_errorVar); + if(p_Vid->mb_data[i-1].ei_flag) + ercMarkCurrSegmentLost((*dec_picture)->size_x, p_Vid->erc_errorVar); + else + ercMarkCurrSegmentOK((*dec_picture)->size_x, p_Vid->erc_errorVar); + + //! call the right error concealment function depending on the frame type. + p_Vid->erc_mvperMB /= (*dec_picture)->PicSizeInMbs; + + p_Vid->erc_img = p_Vid; + if((*dec_picture)->slice_type == I_SLICE || (*dec_picture)->slice_type == SI_SLICE) // I-frame + ercConcealIntraFrame(p_Vid, &recfr, (*dec_picture)->size_x, (*dec_picture)->size_y, p_Vid->erc_errorVar); + else + ercConcealInterFrame(&recfr, p_Vid->erc_object_list, (*dec_picture)->size_x, (*dec_picture)->size_y, p_Vid->erc_errorVar, (*dec_picture)->chroma_format_idc); + } + + //deblocking for frame or field + if( IS_INDEPENDENT(p_Vid) ) + { + int colour_plane_id = p_Vid->colour_plane_id; + for( nplane=0; nplane<MAX_PLANE; ++nplane ) + { + change_plane_JV( p_Vid, nplane ); + DeblockPicture( p_Vid, *dec_picture ); + } + p_Vid->colour_plane_id = colour_plane_id; + make_frame_picture_JV(p_Vid); + } + else + { + DeblockPicture( p_Vid, *dec_picture ); + } + + if ((*dec_picture)->mb_aff_frame_flag) + MbAffPostProc(p_Vid); + + if (p_Vid->structure == FRAME) // buffer mgt. for frame mode + frame_postprocessing(p_Vid); + else + field_postprocessing(p_Vid); // reset all interlaced variables + + structure = (*dec_picture)->structure; + slice_type = (*dec_picture)->slice_type; + frame_poc = (*dec_picture)->frame_poc; + refpic = (*dec_picture)->used_for_reference; + qp = (*dec_picture)->qp; + pic_num = (*dec_picture)->pic_num; + is_idr = (*dec_picture)->idr_flag; + + chroma_format_idc = (*dec_picture)->chroma_format_idc; + + store_picture_in_dpb(p_Vid, *dec_picture); + *dec_picture=NULL; + + if (p_Vid->last_has_mmco_5) + { + p_Vid->pre_frame_num = 0; + } + + + if ((structure==FRAME)||structure==BOTTOM_FIELD) + { + if(slice_type == I_SLICE || slice_type == SI_SLICE || slice_type == P_SLICE || refpic) // I or P pictures + ++(p_Vid->number); + + } + + p_Vid->current_mb_nr = -4712; // impossible value for debugging, StW + p_Vid->current_slice_nr = 0; +} + +/*! + ************************************************************************ + * \brief + * write the encoding mode and motion vectors of current + * MB to the buffer of the error concealment module. + ************************************************************************ + */ + +void ercWriteMBMODEandMV(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int i, ii, jj, currMBNum = p_Vid->current_mb_nr; + StorablePicture *dec_picture = p_Vid->dec_picture; + int mbx = 4*xPosMB(currMBNum, dec_picture->size_x), mby = 4*yPosMB(currMBNum, dec_picture->size_x); + objectBuffer_t *currRegion, *pRegion; + + currRegion = p_Vid->erc_object_list + (currMBNum<<2); + + if(p_Vid->type != B_SLICE) //non-B frame + { + for (i=0; i<4; ++i) + { + pRegion = currRegion + i; + pRegion->regionMode = (currMB->mb_type ==I16MB ? REGMODE_INTRA : + currMB->b8mode[i]==IBLOCK ? REGMODE_INTRA_8x8 : + currMB->b8mode[i]==0 ? REGMODE_INTER_COPY : + currMB->b8mode[i]==1 ? REGMODE_INTER_PRED : REGMODE_INTER_PRED_8x8); + if (currMB->b8mode[i]==0 || currMB->b8mode[i]==IBLOCK) // INTRA OR COPY + { + pRegion->mv[0] = 0; + pRegion->mv[1] = 0; + pRegion->mv[2] = 0; + } + else + { + PicMotion *motion00; + PicMotion *motion01; + ii = mbx + (i & 0x01)*2;// + BLOCK_SIZE; + jj = mby + (i & ~1 ); + motion00 = &dec_picture->motion.motion[LIST_0][jj][ii]; + if (currMB->b8mode[i]>=5 && currMB->b8mode[i]<=7) // SMALL BLOCKS + { + motion01 = &dec_picture->motion.motion[LIST_0][jj+1][ii]; + pRegion->mv[0] = (motion00[0].mv[0] + motion00[1].mv[0] + motion01[0].mv[0] + motion01[1].mv[0] + 2)/4; + pRegion->mv[1] = (motion00[0].mv[1] + motion00[1].mv[1] + motion01[0].mv[1] + motion01[1].mv[1] + 2)/4; + } + else // 16x16, 16x8, 8x16, 8x8 + { + pRegion->mv[0] = motion00->mv[0]; + pRegion->mv[1] = motion00->mv[1]; + } + p_Vid->erc_mvperMB += abs(pRegion->mv[0]) + abs(pRegion->mv[1]); + pRegion->mv[2] = motion00->ref_idx; + } + } + } + else //B-frame + { + for (i=0; i<4; ++i) + { + ii = mbx + (i%2)*2;// + BLOCK_SIZE; + jj = mby + (i/2)*2; + pRegion = currRegion + i; + pRegion->regionMode = (currMB->mb_type ==I16MB ? REGMODE_INTRA : + currMB->b8mode[i]==IBLOCK ? REGMODE_INTRA_8x8 : REGMODE_INTER_PRED_8x8); + if (currMB->mb_type==I16MB || currMB->b8mode[i]==IBLOCK) // INTRA + { + pRegion->mv[0] = 0; + pRegion->mv[1] = 0; + pRegion->mv[2] = 0; + } + else + { + PicMotion *motion0= & dec_picture->motion.motion[LIST_0][jj][ii]; + PicMotion *motion1; + + if (motion0->ref_idx<0) + { + motion0 = & dec_picture->motion.motion[LIST_1][jj][ii]; + motion1 = & dec_picture->motion.motion[LIST_1][jj+1][ii]; + } + else + { + motion1 = & dec_picture->motion.motion[LIST_0][jj+1][ii]; + } +// int idx = (currMB->b8mode[i]==0 && currMB->b8pdir[i]==2 ? LIST_0 : currMB->b8pdir[i]==1 ? LIST_1 : LIST_0); +// int idx = currMB->b8pdir[i]==0 ? LIST_0 : LIST_1; + + pRegion->mv[0] = (motion0[0].mv[0] + motion0[1].mv[0] + motion1[0].mv[0] + motion1[1].mv[0] + 2)/4; + pRegion->mv[1] = (motion0[0].mv[1] + motion0[1].mv[1] + motion1[0].mv[1] + motion1[1].mv[1] + 2)/4; + p_Vid->erc_mvperMB += iabs(pRegion->mv[0]) + iabs(pRegion->mv[1]); + + pRegion->mv[2] = (motion0[0].ref_idx); +/* + if (currMB->b8pdir[i]==0 || (currMB->b8pdir[i]==2 && currMB->b8mode[i]!=0)) // forward or bidirect + { + pRegion->mv[2] = (dec_picture->motion.ref_idx[LIST_0][jj][ii]); + ///???? is it right, not only "p_Vid->fw_refFrArr[jj][ii-4]" + } + else + { + pRegion->mv[2] = (dec_picture->motion.ref_idx[LIST_1][jj][ii]); +// pRegion->mv[2] = 0; + } + */ + } + } + } +} + +/*! + ************************************************************************ + * \brief + * set defaults for old_slice + * NAL unit of a picture" + ************************************************************************ + */ +void init_old_slice(OldSliceParams *p_old_slice) +{ + p_old_slice->field_pic_flag = 0; + + p_old_slice->pps_id = INT_MAX; + + p_old_slice->frame_num = INT_MAX; + + p_old_slice->nal_ref_idc = INT_MAX; + + p_old_slice->idr_flag = FALSE; + + p_old_slice->pic_oder_cnt_lsb = UINT_MAX; + p_old_slice->delta_pic_oder_cnt_bottom = INT_MAX; + + p_old_slice->delta_pic_order_cnt[0] = INT_MAX; + p_old_slice->delta_pic_order_cnt[1] = INT_MAX; +} + +/*! + ************************************************************************ + * \brief + * save slice parameters that are needed for checking of "first VCL + * NAL unit of a picture" + ************************************************************************ + */ +static void exit_slice(Slice *currSlice, OldSliceParams *p_old_slice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + + if (currSlice->slice_type == B_SLICE) + { + if( IS_INDEPENDENT(p_Vid) ) + { + int nplane; + for( nplane = 0; nplane < MAX_PLANE; ++nplane ) + { + free_colocated(p_Vid, currSlice->Co_located_JV[nplane]); + currSlice->Co_located_JV[nplane] = NULL; + } + } + else + { + free_colocated(p_Vid, currSlice->p_colocated); + currSlice->p_colocated = NULL; + } + } + + p_old_slice->pps_id = currSlice->pic_parameter_set_id; + + p_old_slice->frame_num = p_Vid->frame_num; + + p_old_slice->field_pic_flag = p_Vid->field_pic_flag; + + if(p_Vid->field_pic_flag) + { + p_old_slice->bottom_field_flag = p_Vid->bottom_field_flag; + } + + p_old_slice->nal_ref_idc = p_Vid->nal_reference_idc; + + p_old_slice->idr_flag = (byte) p_Vid->idr_flag; + if (p_Vid->idr_flag) + { + p_old_slice->idr_pic_id = p_Vid->idr_pic_id; + } + + if (p_Vid->active_sps->pic_order_cnt_type == 0) + { + p_old_slice->pic_oder_cnt_lsb = p_Vid->pic_order_cnt_lsb; + p_old_slice->delta_pic_oder_cnt_bottom = p_Vid->delta_pic_order_cnt_bottom; + } + + if (p_Vid->active_sps->pic_order_cnt_type == 1) + { + p_old_slice->delta_pic_order_cnt[0] = p_Vid->delta_pic_order_cnt[0]; + p_old_slice->delta_pic_order_cnt[1] = p_Vid->delta_pic_order_cnt[1]; + } +} + +/*! + ************************************************************************ + * \brief + * detect if current slice is "first VCL NAL unit of a picture" + ************************************************************************ + */ +int is_new_picture(StorablePicture *dec_picture, Slice *currSlice, OldSliceParams *p_old_slice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + + int result=0; + + result |= (NULL==dec_picture); + + result |= (p_old_slice->pps_id != currSlice->pic_parameter_set_id); + + result |= (p_old_slice->frame_num != p_Vid->frame_num); + + result |= (p_old_slice->field_pic_flag != p_Vid->field_pic_flag); + + if(p_Vid->field_pic_flag && p_old_slice->field_pic_flag) + { + result |= (p_old_slice->bottom_field_flag != p_Vid->bottom_field_flag); + } + + result |= (p_old_slice->nal_ref_idc != p_Vid->nal_reference_idc) && ((p_old_slice->nal_ref_idc == 0) || (p_Vid->nal_reference_idc == 0)); + + result |= ( p_old_slice->idr_flag != p_Vid->idr_flag); + + if (p_Vid->idr_flag && p_old_slice->idr_flag) + { + result |= (p_old_slice->idr_pic_id != p_Vid->idr_pic_id); + } + + if (p_Vid->active_sps->pic_order_cnt_type == 0) + { + result |= (p_old_slice->pic_oder_cnt_lsb != p_Vid->pic_order_cnt_lsb); + result |= (p_old_slice->delta_pic_oder_cnt_bottom != p_Vid->delta_pic_order_cnt_bottom); + } + + if (p_Vid->active_sps->pic_order_cnt_type == 1) + { + result |= (p_old_slice->delta_pic_order_cnt[0] != p_Vid->delta_pic_order_cnt[0]); + result |= (p_old_slice->delta_pic_order_cnt[1] != p_Vid->delta_pic_order_cnt[1]); + } + + return result; +} + + + +/*! + ************************************************************************ + * \brief + * Prepare field and frame buffer after frame decoding + ************************************************************************ + */ +void frame_postprocessing(VideoParameters *p_Vid) +{ +} + +/*! + ************************************************************************ + * \brief + * Prepare field and frame buffer after field decoding + ************************************************************************ + */ +void field_postprocessing(VideoParameters *p_Vid) +{ + p_Vid->number /= 2; +} + + + +/*! + ************************************************************************ + * \brief + * copy StorablePicture *src -> StorablePicture *dst + * for 4:4:4 Independent mode + ************************************************************************ + */ +void copy_dec_picture_JV( VideoParameters *p_Vid, StorablePicture *dst, StorablePicture *src ) +{ + dst->top_poc = src->top_poc; + dst->bottom_poc = src->bottom_poc; + dst->frame_poc = src->frame_poc; + dst->qp = src->qp; + dst->slice_qp_delta = src->slice_qp_delta; + dst->chroma_qp_offset[0] = src->chroma_qp_offset[0]; + dst->chroma_qp_offset[1] = src->chroma_qp_offset[1]; + + dst->poc = src->poc; + + dst->slice_type = src->slice_type; + dst->used_for_reference = src->used_for_reference; + dst->idr_flag = src->idr_flag; + dst->no_output_of_prior_pics_flag = src->no_output_of_prior_pics_flag; + dst->long_term_reference_flag = src->long_term_reference_flag; + dst->adaptive_ref_pic_buffering_flag = src->adaptive_ref_pic_buffering_flag; + + dst->dec_ref_pic_marking_buffer = src->dec_ref_pic_marking_buffer; + + dst->mb_aff_frame_flag = src->mb_aff_frame_flag; + dst->PicWidthInMbs = src->PicWidthInMbs; + dst->pic_num = src->pic_num; + dst->frame_num = src->frame_num; + dst->recovery_frame = src->recovery_frame; + dst->coded_frame = src->coded_frame; + + dst->chroma_format_idc = src->chroma_format_idc; + + dst->frame_mbs_only_flag = src->frame_mbs_only_flag; + dst->frame_cropping_flag = src->frame_cropping_flag; + + dst->frame_cropping_rect_left_offset = src->frame_cropping_rect_left_offset; + dst->frame_cropping_rect_right_offset = src->frame_cropping_rect_right_offset; + dst->frame_cropping_rect_top_offset = src->frame_cropping_rect_top_offset; + dst->frame_cropping_rect_bottom_offset = src->frame_cropping_rect_bottom_offset; + +#if (ENABLE_OUTPUT_TONEMAPPING) + // store the necessary tone mapping sei into StorablePicture structure + dst->seiHasTone_mapping = src->seiHasTone_mapping; + + dst->seiHasTone_mapping = src->seiHasTone_mapping; + dst->tone_mapping_model_id = src->tone_mapping_model_id; + dst->tonemapped_bit_depth = src->tonemapped_bit_depth; + if( src->tone_mapping_lut ) + { + dst->tone_mapping_lut = malloc(sizeof(int)*(1 << p_Vid->seiToneMapping->coded_data_bit_depth)); + if (NULL == dst->tone_mapping_lut) + { + no_mem_exit("copy_dec_picture_JV: tone_mapping_lut"); + } + memcpy(dst->tone_mapping_lut, src->tone_mapping_lut, sizeof(imgpel)*(1<<p_Vid->seiToneMapping->coded_data_bit_depth)); + } +#endif +} + + +/*! + ************************************************************************ + * \brief + * decodes one slice + ************************************************************************ + */ +void decode_one_slice(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + Boolean end_of_slice = FALSE; + Macroblock *currMB = NULL; + p_Vid->cod_counter=-1; + + setup_slice_methods(currSlice); + + if( IS_INDEPENDENT(p_Vid) ) + { + change_plane_JV( p_Vid, p_Vid->colour_plane_id ); + } + + set_ref_pic_num(p_Vid); + + if (currSlice->slice_type == B_SLICE) + { + currSlice->compute_colocated(currSlice, currSlice->p_colocated, p_Vid->listX); + } + + //reset_ec_flags(p_Vid); + + while (end_of_slice == FALSE) // loop over macroblocks + { + +#if TRACE + fprintf(p_trace,"\n*********** POC: %i (I/P) MB: %i Slice: %i Type %d **********\n", p_Vid->ThisPOC, p_Vid->current_mb_nr, p_Vid->current_slice_nr, currSlice->slice_type); +#endif + + // Initializes the current macroblock + start_macroblock(currSlice, &currMB); + // Get the syntax elements from the NAL + currSlice->read_one_macroblock(currMB); + decode_one_macroblock(currMB, p_Vid->dec_picture); + + if(currSlice->mb_aff_frame_flag && p_Vid->dec_picture->motion.mb_field[p_Vid->current_mb_nr]) + { + currSlice->num_ref_idx_l0_active >>= 1; + currSlice->num_ref_idx_l1_active >>= 1; + } + + ercWriteMBMODEandMV(currMB); + + end_of_slice = exit_macroblock(currSlice, (!currSlice->mb_aff_frame_flag||p_Vid->current_mb_nr%2)); + } + + exit_slice(currSlice, p_Vid->old_slice); + //reset_ec_flags(p_Vid); +} diff --git a/Src/h264dec/ldecod/src/intra16x16_pred.c b/Src/h264dec/ldecod/src/intra16x16_pred.c new file mode 100644 index 00000000..a2b89ca2 --- /dev/null +++ b/Src/h264dec/ldecod/src/intra16x16_pred.c @@ -0,0 +1,428 @@ +/*! + ************************************************************************************* + * \file intra16x16_pred.c + * + * \brief + * Functions for intra 8x8 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Yuri Vatis + * - Jan Muenster + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ +#include "global.h" +#include "intra16x16_pred.h" +#include "mb_access.h" +#include "image.h" +#include <emmintrin.h> +static void memset_16x16(h264_imgpel_macroblock_row_t *mb_pred, int pred) +{ + if (sse2_flag) + { + __m128i xmm_pred = _mm_set1_epi8(pred); + int i; + __m128i *xmm_macroblock = (__m128i *)mb_pred; + for (i=0;i<16;i++) + { + _mm_store_si128(xmm_macroblock++, xmm_pred); + } + } +#ifdef _M_IX86 + else + { + __m64 mmx_pred = _mm_set1_pi8(pred); + int i; + __m64 *mmx_macroblock = (__m64 *)mb_pred; + for (i=0;i<16;i++) + { + *mmx_macroblock++ = mmx_pred; + *mmx_macroblock++ = mmx_pred; + } + } +#else + else + { + int ii, jj; + for (jj = 0; jj < MB_BLOCK_SIZE; jj++) + { + for (ii = 0; ii < MB_BLOCK_SIZE; ii++) + { + mb_pred[jj][ii]=(imgpel) pred; + } + } + } + #endif +} + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 DC prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra16x16_dc_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int s0 = 0, s1 = 0, s2 = 0; + + int i; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int up_avail, left_avail, left_up_avail; + + s1=s2=0; + + p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]); + p_Vid->getNeighbourLeftLuma(currMB, &left[1]); + p_Vid->getNeighbourNPLumaNB(currMB, 2-1, &left[2]); + p_Vid->getNeighbourNPLumaNB(currMB, 3-1, &left[3]); + p_Vid->getNeighbourNPLumaNB(currMB, 4-1, &left[4]); + p_Vid->getNeighbourNPLumaNB(currMB, 5-1, &left[5]); + p_Vid->getNeighbourNPLumaNB(currMB, 6-1, &left[6]); + p_Vid->getNeighbourNPLumaNB(currMB, 7-1, &left[7]); + p_Vid->getNeighbourNPLumaNB(currMB, 8-1, &left[8]); + p_Vid->getNeighbourNPLumaNB(currMB, 9-1, &left[9]); + p_Vid->getNeighbourNPLumaNB(currMB, 10-1, &left[10]); + p_Vid->getNeighbourNPLumaNB(currMB, 11-1, &left[11]); + p_Vid->getNeighbourNPLumaNB(currMB, 12-1, &left[12]); + p_Vid->getNeighbourNPLumaNB(currMB, 13-1, &left[13]); + p_Vid->getNeighbourNPLumaNB(currMB, 14-1, &left[14]); + p_Vid->getNeighbourNPLumaNB(currMB, 15-1, &left[15]); + p_Vid->getNeighbourNPLumaNB(currMB, 16-1, &left[16]); + + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + for (i = 1, left_avail = 1; i < 17; ++i) + left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + if (up_avail) + { + s1 += imgY[up.pos_y][up.pos_x+0]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+1]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+2]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+3]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+4]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+5]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+6]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+7]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+8]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+9]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+10]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+11]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+12]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+13]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+14]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+15]; // sum hor pix + } + + if (left_avail) + { + s2 += imgY[left[0 + 1].pos_y][left[0 + 1].pos_x]; // sum vert pix + s2 += imgY[left[1 + 1].pos_y][left[1 + 1].pos_x]; // sum vert pix + s2 += imgY[left[2 + 1].pos_y][left[2 + 1].pos_x]; // sum vert pix + s2 += imgY[left[3 + 1].pos_y][left[3 + 1].pos_x]; // sum vert pix + s2 += imgY[left[4 + 1].pos_y][left[4 + 1].pos_x]; // sum vert pix + s2 += imgY[left[5 + 1].pos_y][left[5 + 1].pos_x]; // sum vert pix + s2 += imgY[left[6 + 1].pos_y][left[6 + 1].pos_x]; // sum vert pix + s2 += imgY[left[7 + 1].pos_y][left[7 + 1].pos_x]; // sum vert pix + s2 += imgY[left[8 + 1].pos_y][left[8 + 1].pos_x]; // sum vert pix + s2 += imgY[left[9 + 1].pos_y][left[9 + 1].pos_x]; // sum vert pix + s2 += imgY[left[10 + 1].pos_y][left[10 + 1].pos_x]; // sum vert pix + s2 += imgY[left[11 + 1].pos_y][left[11 + 1].pos_x]; // sum vert pix + s2 += imgY[left[12 + 1].pos_y][left[12 + 1].pos_x]; // sum vert pix + s2 += imgY[left[13 + 1].pos_y][left[13 + 1].pos_x]; // sum vert pix + s2 += imgY[left[14 + 1].pos_y][left[14 + 1].pos_x]; // sum vert pix + s2 += imgY[left[15 + 1].pos_y][left[15 + 1].pos_x]; // sum vert pix + } + + if (up_avail && left_avail) + s0 = (s1 + s2 + 16)>>5; // no edge + else if (!up_avail && left_avail) + s0 = (s2 + 8)>>4; // upper edge + else if (up_avail && !left_avail) + s0 = (s1 + 8)>>4; // left edge + else + s0 = p_Vid->dc_pred_value_comp[pl]; // top left corner, nothing to predict from + + memset_16x16(currSlice->mb_pred[pl], s0); + + return DECODING_OK; +} + + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 vertical prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra16x16_vert_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int j; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos up; //!< pixel position p(0,-1) + + int up_avail; + + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + } + + if (!up_avail) + error ("invalid 16x16 intra pred Mode VERT_PRED_16",500); + + for(j=0;j<MB_BLOCK_SIZE;++j) + { + // TODO; take advantage of imgY's stride + memcpy(&currSlice->mb_pred[pl][j][0], &(imgY[up.pos_y][up.pos_x]), MB_BLOCK_SIZE * sizeof(imgpel)); + } + + return DECODING_OK; +} + + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 horizontal prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra16x16_hor_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int i,j; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + imgpel prediction; + + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int left_avail, left_up_avail; + + for (i=0;i<17;++i) + { + p_Vid->getNeighbourNXLuma(currMB, i-1, &left[i]); + } + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + for (i = 1, left_avail = 1; i < 17; ++i) + left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + if (!left_avail) + error ("invalid 16x16 intra pred Mode HOR_PRED_16",500); + + for(j = 0; j < MB_BLOCK_SIZE; ++j) + { + prediction = imgY[left[j+1].pos_y][left[j+1].pos_x]; + for(i = 0; i < MB_BLOCK_SIZE; ++i) + currSlice->mb_pred[pl][j][i]= prediction; // store predicted 16x16 block + } + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 horizontal prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static void planeset(h264_imgpel_macroblock_row_t *dest, int iaa, int ib, int ic) +{ + int j; + __m128i i0_7 = _mm_setr_epi16(-7,-6,-5,-4,-3,-2,-1, 0); + __m128i i8_15 = _mm_setr_epi16(1,2,3,4,5,6,7,8); + __m128i xmm_ib = _mm_set1_epi16(ib); + int j7ic = iaa + -7 * ic + 16; + i0_7 = _mm_mullo_epi16(i0_7, xmm_ib); + i8_15 = _mm_mullo_epi16(i8_15, xmm_ib); + for (j = 0;j < MB_BLOCK_SIZE; ++j) + { + __m128i xmm_j7ic = _mm_set1_epi16(j7ic); + __m128i xmm_lo = _mm_add_epi16(i0_7, xmm_j7ic); + __m128i xmm_hi = _mm_add_epi16(i8_15, xmm_j7ic); + __m128i xmm_store; + xmm_lo = _mm_srai_epi16(xmm_lo, 5); + xmm_hi = _mm_srai_epi16(xmm_hi, 5); + xmm_store = _mm_packus_epi16(xmm_lo, xmm_hi); + _mm_store_si128((__m128i *)dest[j], xmm_store); + j7ic += ic; + }// store plane prediction +} + +static inline int intra16x16_plane_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i,j; + + int ih = 0, iv = 0; + int ib,ic,iaa; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + imgpel *mpr_line; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int up_avail, left_avail, left_up_avail; + + p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]); + p_Vid->getNeighbourLeftLuma(currMB, &left[1]); + for (i=2;i<17; ++i) + { + p_Vid->getNeighbourNPLumaNB(currMB, i-1, &left[i]); + } + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + for (i = 1, left_avail = 1; i < 17; ++i) + left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + if (!up_avail || !left_up_avail || !left_avail) + error ("invalid 16x16 intra pred Mode PLANE_16",500); + + mpr_line = &imgY[up.pos_y][up.pos_x+7]; + for (i = 1; i < 8; ++i) + { + ih += i*(mpr_line[i] - mpr_line[-i]); + iv += i*(imgY[left[8+i].pos_y][left[8+i].pos_x] - imgY[left[8-i].pos_y][left[8-i].pos_x]); + } + + ih += 8*(mpr_line[8] - imgY[left[0].pos_y][left[0].pos_x]); + iv += 8*(imgY[left[16].pos_y][left[16].pos_x] - imgY[left[0].pos_y][left[0].pos_x]); + + ib=(5 * ih + 32)>>6; + ic=(5 * iv + 32)>>6; + + iaa=16 * (mpr_line[8] + imgY[left[16].pos_y][left[16].pos_x]); + if (sse2_flag) + { + planeset(currSlice->mb_pred[pl], iaa, ib, ic); + } + else + { + // TODO: MMX + for (j = 0;j < MB_BLOCK_SIZE; ++j) + { + int j7ic = iaa + (j - 7) * ic + 16; + for (i = 0;i < MB_BLOCK_SIZE; ++i) + { + currSlice->mb_pred[pl][j][i] = (imgpel) iClip1(max_imgpel_value, (((i - 7) * ib + j7ic) >> 5)); + } + }// store plane prediction + } + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 intra prediction blocks + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * SEARCH_SYNC search next sync element as errors while decoding occured + *********************************************************************** + */ +// TODO: replace with ippiPredictIntra_16x16_H264_8u_C1IR ? +int intrapred16x16(Macroblock *currMB, //!< Current Macroblock + ColorPlane pl, //!< Current colorplane (for 4:4:4) + int predmode) //!< prediction mode +{ + switch (predmode) + { + case VERT_PRED_16: // vertical prediction from block above + return (intra16x16_vert_pred(currMB, pl)); + break; + case HOR_PRED_16: // horizontal prediction from left block + return (intra16x16_hor_pred(currMB, pl)); + break; + case DC_PRED_16: // DC prediction + return (intra16x16_dc_pred(currMB, pl)); + break; + case PLANE_16:// 16 bit integer plan pred + return (intra16x16_plane_pred(currMB, pl)); + break; + default: + { // indication of fault in bitstream,exit + printf("illegal 16x16 intra prediction mode input: %d\n",predmode); + return SEARCH_SYNC; + } + } +} + diff --git a/Src/h264dec/ldecod/src/intra4x4_pred.c b/Src/h264dec/ldecod/src/intra4x4_pred.c new file mode 100644 index 00000000..21f16ed6 --- /dev/null +++ b/Src/h264dec/ldecod/src/intra4x4_pred.c @@ -0,0 +1,854 @@ +/*! + ************************************************************************************* + * \file intra4x4_pred.c + * + * \brief + * Functions for intra 4x4 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ +#include "global.h" +#include "intra4x4_pred.h" +#include "mb_access.h" +#include "image.h" + +// Notation for comments regarding prediction and predictors. +// The pels of the 4x4 block are labelled a..p. The predictor pels above +// are labelled A..H, from the left I..L, and from above left X, as follows: +// +// X A B C D E F G H +// I a b c d +// J e f g h +// K i j k l +// L m n o p +// + +// Predictor array index definitions +#define P_X (PredPel[0]) +#define P_A (PredPel[1]) +#define P_B (PredPel[2]) +#define P_C (PredPel[3]) +#define P_D (PredPel[4]) +#define P_E (PredPel[5]) +#define P_F (PredPel[6]) +#define P_G (PredPel[7]) +#define P_H (PredPel[8]) +#define P_I (PredPel[9]) +#define P_J (PredPel[10]) +#define P_K (PredPel[11]) +#define P_L (PredPel[12]) + +static void memset_4x4(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, int pred) +{ +#ifdef _M_IX86 + // benski> can't believe the shitty code that the compiler generated... this code is better + int dword_pred = pred * 0x01010101; + mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x]; + *(int *)mb_pred[0] = dword_pred; + *(int *)mb_pred[1] = dword_pred; + *(int *)mb_pred[2] = dword_pred; + *(int *)mb_pred[3] = dword_pred; +#else + int ii, jj; + for (jj = 0; jj < BLOCK_SIZE; jj++) + { + for (ii = 0; ii < BLOCK_SIZE; ii++) + { + mb_pred[jj][offset_x+ii]=(imgpel) pred; + } + } +#endif +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 DC prediction mode + * + * \param currMB + * current MB structure + * \param pl + * color plane + * \param ioff + * pixel offset X within MB + * \param joff + * pixel offset Y within MB + * \return + * DECODING_OK decoding of intra prediction mode was successful \n + * + *********************************************************************** + */ +static inline int intra4x4_dc_pred(Macroblock *currMB, + ColorPlane pl, + int ioff, + int joff) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + int s0 = 0; + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_a[4], pix_b; + + int block_available_up; + int block_available_left; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]); + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<4;++i) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + } + // TODO: ippiPredictIntra_4x4_H264_8u_C1IR + // form predictor pels + if (block_available_up) + { + s0 += imgY[pix_b.pos_y][pix_b.pos_x + 0]; + s0 += imgY[pix_b.pos_y][pix_b.pos_x + 1]; + s0 += imgY[pix_b.pos_y][pix_b.pos_x + 2]; + s0 += imgY[pix_b.pos_y][pix_b.pos_x + 3]; + } + + if (block_available_left) + { + s0 += imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + s0 += imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + s0 += imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + s0 += imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + } + + if (block_available_up && block_available_left) + { + // no edge + s0 = (s0 + 4)>>3; + } + else if (!block_available_up && block_available_left) + { + // upper edge + s0 = (s0 + 2)>>2; + } + else if (block_available_up && !block_available_left) + { + // left edge + s0 = (s0 + 2)>>2; + } + else //if (!block_available_up && !block_available_left) + { + // top left corner, nothing to predict from + s0 = p_Vid->dc_pred_value_comp[pl]; + } + + memset_4x4(&currSlice->mb_pred[pl][joff], ioff, s0); + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 vertical prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_vert_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + imgpel *src; + PixelPos pix_b; + + p_Vid->getNeighbourPXLumaNB(currMB, ioff, joff - 1 , &pix_b); +#ifdef H264_WARNINGS + { + int block_available_up; + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + } + else + { + block_available_up = pix_b.available; + } + + if (!block_available_up) + printf ("warning: Intra_4x4_Vertical prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + } +#endif + src = &(imgY[pix_b.pos_y][pix_b.pos_x]); + /* store predicted 4x4 block */ + memcpy(&(currSlice->mb_pred[pl][joff][ioff]), src, BLOCK_SIZE * sizeof(imgpel)); + memcpy(&(currSlice->mb_pred[pl][joff+1][ioff]), src, BLOCK_SIZE * sizeof(imgpel)); + memcpy(&(currSlice->mb_pred[pl][joff+2][ioff]), src, BLOCK_SIZE * sizeof(imgpel)); + memcpy(&(currSlice->mb_pred[pl][joff+3][ioff]), src, BLOCK_SIZE * sizeof(imgpel)); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 horizontal prediction mode + * + * \param currMB + * current MB structure + * \param pl + * color plane + * \param ioff + * pixel offset X within MB + * \param joff + * pixel offset Y within MB + * + * \return + * DECODING_OK decoding of intra prediction mode was successful + * + *********************************************************************** + */ +static inline int intra4x4_hor_pred(Macroblock *currMB, + ColorPlane pl, + int ioff, + int joff) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + + int j; + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_a[4]; + + imgpel prediction; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 0 , &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 1 , &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 2 , &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 3 , &pix_a[3]); + +#ifdef H264_WARNINGS + { + int i; + int block_available_left; + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<4;++i) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + } + else + { + block_available_left = pix_a[0].available; + } + + if (!block_available_left) + printf ("warning: Intra_4x4_Horizontal prediction mode not allowed at mb %d\n",(int) p_Vid->current_mb_nr); + } +#endif + for(j=0;j<BLOCK_SIZE;++j) + { + // TODO: write using imgY's stride + prediction = imgY[pix_a[j].pos_y][pix_a[j].pos_x]; + //for(i = ioff;i < ioff + BLOCK_SIZE;++i) + currSlice->mb_pred[pl][j+joff][ioff]= prediction; /* store predicted 4x4 block */ + currSlice->mb_pred[pl][j+joff][ioff+1]= prediction; /* store predicted 4x4 block */ + currSlice->mb_pred[pl][j+joff][ioff+2]= prediction; /* store predicted 4x4 block */ + currSlice->mb_pred[pl][j+joff][ioff+3]= prediction; /* store predicted 4x4 block */ + } + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 diagonal down right prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_diag_down_right_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + + imgpel PredPel[13]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_a[4]; + PixelPos pix_b, pix_d; + + + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 0 , &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 1 , &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 2 , &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 3 , &pix_a[3]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b); + p_Vid->getNeighbourLuma(currMB, ioff -1 , joff -1 , &pix_d); +#ifdef H264_WARNINGS + { + int block_available_up; + int block_available_left; + int block_available_up_left; + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + int i; + for (i=0, block_available_left=1; i<4;++i) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_left = pix_d.available; + } + + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_4x4_Diagonal_Down_Right prediction mode not allowed at mb %d\n",(int) p_Vid->current_mb_nr); +} +#endif + // form predictor pels + P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3]; + + P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + + P_X = imgY[pix_d.pos_y][pix_d.pos_x]; + + currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_L + 2*P_K + P_J + 2) >> 2); + currSlice->mb_pred[pl][joff+2][ioff+0] = + currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_K + 2*P_J + P_I + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+0] = + currSlice->mb_pred[pl][joff+2][ioff+1] = + currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_J + 2*P_I + P_X + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+0] = + currSlice->mb_pred[pl][joff+1][ioff+1] = + currSlice->mb_pred[pl][joff+2][ioff+2] = + currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+1] = + currSlice->mb_pred[pl][joff+1][ioff+2] = + currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+2] = + currSlice->mb_pred[pl][joff+1][ioff+3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+3] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 diagonal down left prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_diag_down_left_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int block_available_up_right; + #ifdef H264_WARNINGS + int block_available_up; +#endif + imgpel PredPel[13]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_b, pix_c; + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourLuma(currMB, ioff + 4, joff - 1, &pix_c); + + pix_c.available = pix_c.available && !((ioff==4) && ((joff==4)||(joff==12))); + + + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + #ifdef H264_WARNINGS + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; +#endif + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + } + else + { + #ifdef H264_WARNINGS + block_available_up = pix_b.available; +#endif + block_available_up_right = pix_c.available; + } +#ifdef H264_WARNINGS + if (!block_available_up) + printf ("warning: Intra_4x4_Diagonal_Down_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + +#endif + // form predictor pels + P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3]; + + if (block_available_up_right) + { + P_E = imgY[pix_c.pos_y][pix_c.pos_x + 0]; + P_F = imgY[pix_c.pos_y][pix_c.pos_x + 1]; + P_G = imgY[pix_c.pos_y][pix_c.pos_x + 2]; + P_H = imgY[pix_c.pos_y][pix_c.pos_x + 3]; + } + else + { + P_E = P_F = P_G = P_H = P_D; + } + + currSlice->mb_pred[pl][joff+0][ioff+0] = (imgpel) ((P_A + P_C + 2*(P_B) + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+1] = + currSlice->mb_pred[pl][joff+1][ioff+0] = (imgpel) ((P_B + P_D + 2*(P_C) + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+2] = + currSlice->mb_pred[pl][joff+1][ioff+1] = + currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_C + P_E + 2*(P_D) + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+3] = + currSlice->mb_pred[pl][joff+1][ioff+2] = + currSlice->mb_pred[pl][joff+2][ioff+1] = + currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_D + P_F + 2*(P_E) + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+3] = + currSlice->mb_pred[pl][joff+2][ioff+2] = + currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_E + P_G + 2*(P_F) + 2) >> 2); + currSlice->mb_pred[pl][joff+2][ioff+3] = + currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_F + P_H + 2*(P_G) + 2) >> 2); + currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_G + 3*(P_H) + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 vertical right prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_vert_right_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + + imgpel PredPel[13]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_a[4]; + PixelPos pix_b, pix_d; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b); + p_Vid->getNeighbourLuma(currMB, ioff -1 , joff -1 , &pix_d); +#ifdef H264_WARNINGS + { + int i; + int block_available_up; + int block_available_left; + int block_available_up_left; + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<4;++i) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_left = pix_d.available; + } + + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_4x4_Vertical_Right prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); +} +#endif + // form predictor pels + P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3]; + + P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + + P_X = imgY[pix_d.pos_y][pix_d.pos_x]; + + currSlice->mb_pred[pl][joff+0][ioff+0] = + currSlice->mb_pred[pl][joff+2][ioff+1] = (imgpel) ((P_X + P_A + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+1] = + currSlice->mb_pred[pl][joff+2][ioff+2] = (imgpel) ((P_A + P_B + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+2] = + currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_B + P_C + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+3] = (imgpel) ((P_C + P_D + 1) >> 1); + currSlice->mb_pred[pl][joff+1][ioff+0] = + currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+1] = + currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+2] = + currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+3] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2); + currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_X + 2*P_I + P_J + 2) >> 2); + currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 vertical left prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_vert_left_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + imgpel PredPel[13]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_b, pix_c; + + int block_available_up; + int block_available_up_right; + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b); + p_Vid->getNeighbourLuma(currMB, ioff +4 , joff -1 , &pix_c); + + pix_c.available = pix_c.available && !((ioff==4) && ((joff==4)||(joff==12))); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + } + else + { + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + } + + + if (!block_available_up) + printf ("warning: Intra_4x4_Vertical_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3]; + + if (block_available_up_right) + { + P_E = imgY[pix_c.pos_y][pix_c.pos_x + 0]; + P_F = imgY[pix_c.pos_y][pix_c.pos_x + 1]; + P_G = imgY[pix_c.pos_y][pix_c.pos_x + 2]; + P_H = imgY[pix_c.pos_y][pix_c.pos_x + 3]; + } + else + { + P_E = P_F = P_G = P_H = P_D; + } + + currSlice->mb_pred[pl][joff+0][ioff+0] = (imgpel) ((P_A + P_B + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+1] = + currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_B + P_C + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+2] = + currSlice->mb_pred[pl][joff+2][ioff+1] = (imgpel) ((P_C + P_D + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+3] = + currSlice->mb_pred[pl][joff+2][ioff+2] = (imgpel) ((P_D + P_E + 1) >> 1); + currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_E + P_F + 1) >> 1); + currSlice->mb_pred[pl][joff+1][ioff+0] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+1] = + currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+2] = + currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_C + 2*P_D + P_E + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+3] = + currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_D + 2*P_E + P_F + 2) >> 2); + currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_E + 2*P_F + P_G + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 horizontal up prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_hor_up_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + imgpel PredPel[13]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_a[4]; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]); + +#ifdef H264_WARNINGS + { + int i; + int block_available_left; + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<4;++i) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + } + else + { + block_available_left = pix_a[0].available; + } + + if (!block_available_left) + printf ("warning: Intra_4x4_Horizontal_Up prediction mode not allowed at mb %d\n",(int) p_Vid->current_mb_nr); + } +#endif + // form predictor pels + P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + + currSlice->mb_pred[pl][joff+0][ioff+0] = (imgpel) ((P_I + P_J + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+1] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+2] = + currSlice->mb_pred[pl][joff+1][ioff+0] = (imgpel) ((P_J + P_K + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+3] = + currSlice->mb_pred[pl][joff+1][ioff+1] = (imgpel) ((P_J + 2*P_K + P_L + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+2] = + currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_K + P_L + 1) >> 1); + currSlice->mb_pred[pl][joff+1][ioff+3] = + currSlice->mb_pred[pl][joff+2][ioff+1] = (imgpel) ((P_K + 2*P_L + P_L + 2) >> 2); + currSlice->mb_pred[pl][joff+2][ioff+3] = + currSlice->mb_pred[pl][joff+3][ioff+1] = + currSlice->mb_pred[pl][joff+3][ioff+0] = + currSlice->mb_pred[pl][joff+2][ioff+2] = + currSlice->mb_pred[pl][joff+3][ioff+2] = + currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) P_L; + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 horizontal down prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra4x4_hor_down_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + imgpel PredPel[13]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos pix_a[4]; + PixelPos pix_b, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b); + p_Vid->getNeighbourLuma(currMB, ioff -1 , joff -1 , &pix_d); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<4;++i) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_left = pix_d.available; + } + + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_4x4_Horizontal_Down prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3]; + + P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + + P_X = imgY[pix_d.pos_y][pix_d.pos_x]; + + currSlice->mb_pred[pl][joff+0][ioff+0] = + currSlice->mb_pred[pl][joff+1][ioff+2] = (imgpel) ((P_X + P_I + 1) >> 1); + currSlice->mb_pred[pl][joff+0][ioff+1] = + currSlice->mb_pred[pl][joff+1][ioff+3] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+2] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2); + currSlice->mb_pred[pl][joff+0][ioff+3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2); + currSlice->mb_pred[pl][joff+1][ioff+0] = + currSlice->mb_pred[pl][joff+2][ioff+2] = (imgpel) ((P_I + P_J + 1) >> 1); + currSlice->mb_pred[pl][joff+1][ioff+1] = + currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_X + 2*P_I + P_J + 2) >> 2); + currSlice->mb_pred[pl][joff+2][ioff+0] = + currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_J + P_K + 1) >> 1); + currSlice->mb_pred[pl][joff+2][ioff+1] = + currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2); + currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_K + P_L + 1) >> 1); + currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_J + 2*P_K + P_L + 2) >> 2); + + return DECODING_OK; +} + + +/*! + *********************************************************************** + * \brief + * makes and returns 4x4 intra prediction blocks + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * SEARCH_SYNC search next sync element as errors while decoding occured + *********************************************************************** + */ +int intrapred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff, //!< pixel offset Y within MB + int img_block_x, //!< location of block X, multiples of 4 + int img_block_y) //!< location of block Y, multiples of 4 +{ + VideoParameters *p_Vid = currMB->p_Vid; + byte predmode = p_Vid->ipredmode[img_block_y][img_block_x]; + currMB->ipmode_DPCM = predmode; //For residual DPCM + + switch (predmode) + { + case DC_PRED: + return (intra4x4_dc_pred(currMB, pl, ioff, joff)); + break; + case VERT_PRED: + return (intra4x4_vert_pred(currMB, pl, ioff, joff)); + break; + case HOR_PRED: + return (intra4x4_hor_pred(currMB, pl, ioff, joff)); + break; + case DIAG_DOWN_RIGHT_PRED: + return (intra4x4_diag_down_right_pred(currMB, pl, ioff, joff)); + break; + case DIAG_DOWN_LEFT_PRED: + return (intra4x4_diag_down_left_pred(currMB, pl, ioff, joff)); + break; + case VERT_RIGHT_PRED: + return (intra4x4_vert_right_pred(currMB, pl, ioff, joff)); + break; + case VERT_LEFT_PRED: + return (intra4x4_vert_left_pred(currMB, pl, ioff, joff)); + break; + case HOR_UP_PRED: + return (intra4x4_hor_up_pred(currMB, pl, ioff, joff)); + break; + case HOR_DOWN_PRED: + return (intra4x4_hor_down_pred(currMB, pl, ioff, joff)); + default: + printf("Error: illegal intra_4x4 prediction mode: %d\n", (int) predmode); + return SEARCH_SYNC; + break; + } +} diff --git a/Src/h264dec/ldecod/src/intra8x8_pred.c b/Src/h264dec/ldecod/src/intra8x8_pred.c new file mode 100644 index 00000000..217e0ea8 --- /dev/null +++ b/Src/h264dec/ldecod/src/intra8x8_pred.c @@ -0,0 +1,1928 @@ +/*! + ************************************************************************************* + * \file intra8x8_pred.c + * + * \brief + * Functions for intra 8x8 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Yuri Vatis + * - Jan Muenster + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ +#include "global.h" +#include "intra8x8_pred.h" +#include "mb_access.h" +#include "image.h" + +// Notation for comments regarding prediction and predictors. +// The pels of the 8x8 block are labeled a..p. The predictor pels above +// are labeled A..H, from the left I..P, and from above left X, as follows: +// +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 +// 17 a1 b1 c1 d1 e1 f1 g1 h1 +// 18 a2 b2 c2 d2 e2 f2 g2 h2 +// 19 a3 b3 c3 d3 e3 f3 g3 h3 +// 20 a4 b4 c4 d4 e4 f4 g4 h4 +// 21 a5 b5 c5 d5 e5 f5 g5 h5 +// 22 a6 b6 c6 d6 e6 f6 g6 h6 +// 23 a7 b7 c7 d7 e7 f7 g7 h7 +// 24 a8 b8 c8 d8 e8 f8 g8 h8 + + +static void memset_8x8(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, int pred) +{ +#ifdef _M_IX86 + // benski> can't believe the shitty code that the compiler generated... this code is better + __m64 mmx_pred = _mm_set1_pi8(pred); + mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x]; + *(__m64 *)mb_pred[0] = mmx_pred; + *(__m64 *)mb_pred[1] = mmx_pred; + *(__m64 *)mb_pred[2] = mmx_pred; + *(__m64 *)mb_pred[3] = mmx_pred; + *(__m64 *)mb_pred[4] = mmx_pred; + *(__m64 *)mb_pred[5] = mmx_pred; + *(__m64 *)mb_pred[6] = mmx_pred; + *(__m64 *)mb_pred[7] = mmx_pred; +#else + int ii, jj; + for (jj = 0; jj < BLOCK_SIZE_8x8; jj++) + { + for (ii = 0; ii < BLOCK_SIZE_8x8; ii++) + { + mb_pred[jj][offset_x+ii]=(imgpel) pred; + } + } +#endif +} + +static void memset_8x8_row(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, const imgpel row[8]) +{ +#ifdef _M_IX86 + // benski> can't believe the shitty code that the compiler generated... this code is better + __m64 mmx_pred = *(__m64 *)row; + mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x]; + *(__m64 *)mb_pred[0] = mmx_pred; + *(__m64 *)mb_pred[1] = mmx_pred; + *(__m64 *)mb_pred[2] = mmx_pred; + *(__m64 *)mb_pred[3] = mmx_pred; + *(__m64 *)mb_pred[4] = mmx_pred; + *(__m64 *)mb_pred[5] = mmx_pred; + *(__m64 *)mb_pred[6] = mmx_pred; + *(__m64 *)mb_pred[7] = mmx_pred; +#else + int jj; + for (jj = 0; jj < BLOCK_SIZE_8x8; jj++) + { + memcpy(&mb_pred[jj][offset_x], row, 8); + } +#endif +} + +/*! + ************************************************************************************* + * \brief + * Prefiltering for Intra8x8 prediction + ************************************************************************************* + */ +static __forceinline void LowPassForIntra8x8Pred(imgpel *PredPel, int block_up_left, int block_up, int block_left) +{ + imgpel LoopArray[25]; + + memcpy(&LoopArray[0], &PredPel[0], 25 * sizeof(imgpel)); + + if(block_up_left) + { + if(block_up && block_left) + { + PredPel[0] = (imgpel) ((LoopArray[17] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2); + } + else + { + if(block_up) + PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2); + else if (block_left) + PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[17] + 2)>>2); + } + } + + if(block_up) + { + if(block_up_left) + { + PredPel[1] = (imgpel) ((LoopArray[0] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2); + } + else + PredPel[1] = (imgpel) ((LoopArray[1] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2); + + + PredPel[2] = (imgpel) ((LoopArray[2-1] + (LoopArray[2]<<1) + LoopArray[2+1] + 2)>>2); + PredPel[3] = (imgpel) ((LoopArray[3-1] + (LoopArray[3]<<1) + LoopArray[3+1] + 2)>>2); + PredPel[4] = (imgpel) ((LoopArray[4-1] + (LoopArray[4]<<1) + LoopArray[4+1] + 2)>>2); + PredPel[5] = (imgpel) ((LoopArray[5-1] + (LoopArray[5]<<1) + LoopArray[5+1] + 2)>>2); + PredPel[6] = (imgpel) ((LoopArray[6-1] + (LoopArray[6]<<1) + LoopArray[6+1] + 2)>>2); + PredPel[7] = (imgpel) ((LoopArray[7-1] + (LoopArray[7]<<1) + LoopArray[7+1] + 2)>>2); + PredPel[8] = (imgpel) ((LoopArray[8-1] + (LoopArray[8]<<1) + LoopArray[8+1] + 2)>>2); + PredPel[9] = (imgpel) ((LoopArray[9-1] + (LoopArray[9]<<1) + LoopArray[9+1] + 2)>>2); + PredPel[10] = (imgpel) ((LoopArray[10-1] + (LoopArray[10]<<1) + LoopArray[10+1] + 2)>>2); + PredPel[11] = (imgpel) ((LoopArray[11-1] + (LoopArray[11]<<1) + LoopArray[11+1] + 2)>>2); + PredPel[12] = (imgpel) ((LoopArray[12-1] + (LoopArray[12]<<1) + LoopArray[12+1] + 2)>>2); + PredPel[13] = (imgpel) ((LoopArray[13-1] + (LoopArray[13]<<1) + LoopArray[13+1] + 2)>>2); + PredPel[14] = (imgpel) ((LoopArray[14-1] + (LoopArray[14]<<1) + LoopArray[14+1] + 2)>>2); + PredPel[15] = (imgpel) ((LoopArray[15-1] + (LoopArray[15]<<1) + LoopArray[15+1] + 2)>>2); + + PredPel[16] = (imgpel) ((LoopArray[16] + (LoopArray[16]<<1) + LoopArray[15] + 2)>>2); + } + + if(block_left) + { + if(block_up_left) + PredPel[17] = (imgpel) ((LoopArray[0] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2); + else + PredPel[17] = (imgpel) ((LoopArray[17] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2); + + PredPel[18] = (imgpel) ((LoopArray[18-1] + (LoopArray[18]<<1) + LoopArray[18+1] + 2)>>2); + PredPel[19] = (imgpel) ((LoopArray[19-1] + (LoopArray[19]<<1) + LoopArray[19+1] + 2)>>2); + PredPel[20] = (imgpel) ((LoopArray[20-1] + (LoopArray[20]<<1) + LoopArray[20+1] + 2)>>2); + PredPel[21] = (imgpel) ((LoopArray[21-1] + (LoopArray[21]<<1) + LoopArray[21+1] + 2)>>2); + PredPel[22] = (imgpel) ((LoopArray[22-1] + (LoopArray[22]<<1) + LoopArray[22+1] + 2)>>2); + PredPel[23] = (imgpel) ((LoopArray[23-1] + (LoopArray[23]<<1) + LoopArray[23+1] + 2)>>2); + + PredPel[24] = (imgpel) ((LoopArray[23] + (LoopArray[24]<<1) + LoopArray[24] + 2) >> 2); + } + + //memcpy(&PredPel[0], &LoopArray[0], 25 * sizeof(imgpel)); +} + +/*! + ************************************************************************************* + * \brief + * Prefiltering for Intra8x8 prediction (Horizontal) + ************************************************************************************* + */ +static __forceinline void LowPassForIntra8x8PredHor(imgpel *PredPel, int block_up_left, int block_up, int block_left) +{ + imgpel LoopArray[16]; + + memcpy(&LoopArray[0], &PredPel[0], 16 * sizeof(imgpel)); + + if(block_up_left) + { + if(block_up && block_left) + { + PredPel[0] = (imgpel) ((PredPel[17] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2); + } + else + { + if(block_up) + PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2); + else if (block_left) + PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + PredPel[17] + 2)>>2); + } + } + + if(block_up) + { + if(block_up_left) + { + PredPel[1] = (imgpel) ((LoopArray[0] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2); + } + else + PredPel[1] = (imgpel) ((LoopArray[1] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2); + + + PredPel[2] = (imgpel) ((LoopArray[2-1] + (LoopArray[2]<<1) + LoopArray[2+1] + 2)>>2); + PredPel[3] = (imgpel) ((LoopArray[3-1] + (LoopArray[3]<<1) + LoopArray[3+1] + 2)>>2); + PredPel[4] = (imgpel) ((LoopArray[4-1] + (LoopArray[4]<<1) + LoopArray[4+1] + 2)>>2); + PredPel[5] = (imgpel) ((LoopArray[5-1] + (LoopArray[5]<<1) + LoopArray[5+1] + 2)>>2); + PredPel[6] = (imgpel) ((LoopArray[6-1] + (LoopArray[6]<<1) + LoopArray[6+1] + 2)>>2); + PredPel[7] = (imgpel) ((LoopArray[7-1] + (LoopArray[7]<<1) + LoopArray[7+1] + 2)>>2); + PredPel[8] = (imgpel) ((LoopArray[8-1] + (LoopArray[8]<<1) + LoopArray[8+1] + 2)>>2); + PredPel[9] = (imgpel) ((LoopArray[9-1] + (LoopArray[9]<<1) + LoopArray[9+1] + 2)>>2); + PredPel[10] = (imgpel) ((LoopArray[10-1] + (LoopArray[10]<<1) + LoopArray[10+1] + 2)>>2); + PredPel[11] = (imgpel) ((LoopArray[11-1] + (LoopArray[11]<<1) + LoopArray[11+1] + 2)>>2); + PredPel[12] = (imgpel) ((LoopArray[12-1] + (LoopArray[12]<<1) + LoopArray[12+1] + 2)>>2); + PredPel[13] = (imgpel) ((LoopArray[13-1] + (LoopArray[13]<<1) + LoopArray[13+1] + 2)>>2); + PredPel[14] = (imgpel) ((LoopArray[14-1] + (LoopArray[14]<<1) + LoopArray[14+1] + 2)>>2); + PredPel[15] = (imgpel) ((LoopArray[15-1] + (LoopArray[15]<<1) + PredPel[15+1] + 2)>>2); + PredPel[16] = (imgpel) ((PredPel[16] + (PredPel[16]<<1) + LoopArray[15] + 2)>>2); + } + + + //memcpy(&PredPel[0], &LoopArray[0], 17 * sizeof(imgpel)); +} + +/*! + ************************************************************************************* + * \brief + * Prefiltering for Intra8x8 prediction (Vertical) + ************************************************************************************* + */ +static __forceinline void LowPassForIntra8x8PredVer(imgpel *PredPel, int block_up_left, int block_up, int block_left) +{ + // These functions need some cleanup and can be further optimized. + // For convenience, let us copy all data for now. It is obvious that the filtering makes things a bit more "complex" + int i; + imgpel LoopArray[25]; + + //memcpy(&LoopArray[0], &PredPel[0], 25 * sizeof(imgpel)); + LoopArray[0] = PredPel[0]; + LoopArray[1] = PredPel[1]; + LoopArray[17] = PredPel[17]; + LoopArray[18] = PredPel[18]; + LoopArray[19] = PredPel[19]; + LoopArray[20] = PredPel[20]; + LoopArray[21] = PredPel[21]; + LoopArray[22] = PredPel[22]; + LoopArray[23] = PredPel[23]; + LoopArray[24] = PredPel[24]; + + if(block_up_left) + { + if(block_up && block_left) + { + PredPel[0] = (imgpel) ((LoopArray[17] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2); + } + else + { + if(block_up) + PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2); + else if (block_left) + PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[17] + 2)>>2); + } + } + + if(block_left) + { + if(block_up_left) + PredPel[17] = (imgpel) ((LoopArray[0] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2); + else + PredPel[17] = (imgpel) ((LoopArray[17] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2); + + for(i = 18; i <24; i++) + { + PredPel[i] = (imgpel) ((LoopArray[i-1] + (LoopArray[i]<<1) + LoopArray[i+1] + 2)>>2); + } + PredPel[24] = (imgpel) ((LoopArray[23] + (LoopArray[24]<<1) + LoopArray[24] + 2) >> 2); + } + + //memcpy(&PredPel[0], &LoopArray[0], 25 * sizeof(imgpel)); +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 DC prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_dc_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + int s0 = 0; + imgpel PredPel[25]; // array of predictor pels + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + StorablePicture *dec_picture = p_Vid->dec_picture; + imgpel **imgY = (pl) ? dec_picture->imgUV[pl - 1]->img : dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + imgpel *pred_pels; + + if (ioff == 0) + { + p_Vid->getNeighbourNPLumaNB(currMB, joff + 0, &pix_a[0]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 1, &pix_a[1]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 2, &pix_a[2]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 3, &pix_a[3]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 4, &pix_a[4]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 5, &pix_a[5]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 6, &pix_a[6]); + p_Vid->getNeighbourNPLumaNB(currMB, joff + 7, &pix_a[7]); + + p_Vid->getNeighbour0XLuma(currMB, joff - 1, &pix_b); + p_Vid->getNeighbourPXLumaNB(currMB, 8, joff - 1, &pix_c); + p_Vid->getNeighbourNXLuma(currMB, joff - 1, &pix_d); + } + else + { // ioff == 8 + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourPXLumaNB(currMB, ioff - 1, joff - 1, &pix_d); + } + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + int i; + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[0].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[0].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[0].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[0].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[0].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[0].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[0].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); + + if (block_available_up && block_available_left) + { + // no edge + s0 = (PredPel[1] + PredPel[2] + PredPel[3] + PredPel[4] + PredPel[5] + PredPel[6] + PredPel[7] + PredPel[8] + PredPel[17] + PredPel[18] + PredPel[19] + PredPel[20] + PredPel[21] + PredPel[22] + PredPel[23] + PredPel[24] + 8) >> 4; + } + else if (!block_available_up && block_available_left) + { + // upper edge + s0 = (PredPel[17] + PredPel[18] + PredPel[19] + PredPel[20] + PredPel[21] + PredPel[22] + PredPel[23] + PredPel[24] + 4) >> 3; + } + else if (block_available_up && !block_available_left) + { + // left edge + s0 = (PredPel[1] + PredPel[2] + PredPel[3] + PredPel[4] + PredPel[5] + PredPel[6] + PredPel[7] + PredPel[8] + 4) >> 3; + } + else //if (!block_available_up && !block_available_left) + { + // top left corner, nothing to predict from + s0 = p_Vid->dc_pred_value_comp[pl]; + } + + memset_8x8(&currSlice->mb_pred[pl][joff], ioff, s0); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 vertical prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_vert_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + + imgpel *pred_pels; + + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if (!block_available_up) + printf ("warning: Intra_8x8_Vertical prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = *(pred_pels ++); + PredPel[2] = *(pred_pels ++); + PredPel[3] = *(pred_pels ++); + PredPel[4] = *(pred_pels ++); + PredPel[5] = *(pred_pels ++); + PredPel[6] = *(pred_pels ++); + PredPel[7] = *(pred_pels ++); + PredPel[8] = *pred_pels; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = *(pred_pels ++); + PredPel[10] = *(pred_pels ++); + PredPel[11] = *(pred_pels ++); + PredPel[12] = *(pred_pels ++); + PredPel[13] = *(pred_pels ++); + PredPel[14] = *(pred_pels ++); + PredPel[15] = *(pred_pels ++); + PredPel[16] = *pred_pels; + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8PredHor(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); + + memset_8x8_row(&currSlice->mb_pred[pl][joff], ioff, &PredPel[1]); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 horizontal prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_hor_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + + int i,j; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + int jpos; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if (!block_available_left) + printf ("warning: Intra_8x8_Horizontal prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8PredVer(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); + + for (j=0; j < BLOCK_SIZE_8x8; j++) + { + jpos = j + joff; + currSlice->mb_pred[pl][jpos][ioff] = + currSlice->mb_pred[pl][jpos][ioff+1] = + currSlice->mb_pred[pl][jpos][ioff+2] = + currSlice->mb_pred[pl][jpos][ioff+3] = + currSlice->mb_pred[pl][jpos][ioff+4] = + currSlice->mb_pred[pl][jpos][ioff+5] = + currSlice->mb_pred[pl][jpos][ioff+6] = + currSlice->mb_pred[pl][jpos][ioff+7] = (imgpel) (&PredPel[17])[j]; + } + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 diagonal down right prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_diag_down_right_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; +h264_imgpel_macroblock_row_t *pred; + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + imgpel *pred_pels; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_8x8_Diagonal_Down_Right prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); + + // Mode DIAG_DOWN_RIGHT_PRED + pred = &currSlice->mb_pred[pl][joff]; + pred[0+7][ioff+0] = (imgpel) ((PredPel[24] + PredPel[22] + 2*(PredPel[23]) + 2) >> 2); + pred[0+6][ioff+0] = + pred[0+7][ioff+1] = (imgpel) ((PredPel[23] + PredPel[21] + 2*(PredPel[22]) + 2) >> 2); + pred[0+5][ioff+0] = + pred[0+6][ioff+1] = + pred[0+7][ioff+2] = (imgpel) ((PredPel[22] + PredPel[20] + 2*(PredPel[21]) + 2) >> 2); + pred[0+4][ioff+0] = + pred[0+5][ioff+1] = + pred[0+6][ioff+2] = + pred[0+7][ioff+3] = (imgpel) ((PredPel[21] + PredPel[19] + 2*(PredPel[20]) + 2) >> 2); + pred[0+3][ioff+0] = + pred[0+4][ioff+1] = + pred[0+5][ioff+2] = + pred[0+6][ioff+3] = + pred[0+7][ioff+4] = (imgpel) ((PredPel[20] + PredPel[18] + 2*(PredPel[19]) + 2) >> 2); + pred[0+2][ioff+0] = + pred[0+3][ioff+1] = + pred[0+4][ioff+2] = + pred[0+5][ioff+3] = + pred[0+6][ioff+4] = + pred[0+7][ioff+5] = (imgpel) ((PredPel[19] + PredPel[17] + 2*(PredPel[18]) + 2) >> 2); + pred[0+1][ioff+0] = + pred[0+2][ioff+1] = + pred[0+3][ioff+2] = + pred[0+4][ioff+3] = + pred[0+5][ioff+4] = + pred[0+6][ioff+5] = + pred[0+7][ioff+6] = (imgpel) ((PredPel[18] + PredPel[0] + 2*(PredPel[17]) + 2) >> 2); + pred[0+0][ioff+0] = + pred[0+1][ioff+1] = + pred[0+2][ioff+2] = + pred[0+3][ioff+3] = + pred[0+4][ioff+4] = + pred[0+5][ioff+5] = + pred[0+6][ioff+6] = + pred[0+7][ioff+7] = (imgpel) ((PredPel[17] + PredPel[1] + 2*(PredPel[0]) + 2) >> 2); + pred[0+0][ioff+1] = + pred[0+1][ioff+2] = + pred[0+2][ioff+3] = + pred[0+3][ioff+4] = + pred[0+4][ioff+5] = + pred[0+5][ioff+6] = + pred[0+6][ioff+7] = (imgpel) ((PredPel[0] + PredPel[2] + 2*(PredPel[1]) + 2) >> 2); + pred[0+0][ioff+2] = + pred[0+1][ioff+3] = + pred[0+2][ioff+4] = + pred[0+3][ioff+5] = + pred[0+4][ioff+6] = + pred[0+5][ioff+7] = (imgpel) ((PredPel[1] + PredPel[3] + 2*(PredPel[2]) + 2) >> 2); + pred[0+0][ioff+3] = + pred[0+1][ioff+4] = + pred[0+2][ioff+5] = + pred[0+3][ioff+6] = + pred[0+4][ioff+7] = (imgpel) ((PredPel[2] + PredPel[4] + 2*(PredPel[3]) + 2) >> 2); + pred[0+0][ioff+4] = + pred[0+1][ioff+5] = + pred[0+2][ioff+6] = + pred[0+3][ioff+7] = (imgpel) ((PredPel[3] + PredPel[5] + 2*(PredPel[4]) + 2) >> 2); + pred[0+0][ioff+5] = + pred[0+1][ioff+6] = + pred[0+2][ioff+7] = (imgpel) ((PredPel[4] + PredPel[6] + 2*(PredPel[5]) + 2) >> 2); + pred[0+0][ioff+6] = + pred[0+1][ioff+7] = (imgpel) ((PredPel[5] + PredPel[7] + 2*(PredPel[6]) + 2) >> 2); + pred[0+0][ioff+7] = (imgpel) ((PredPel[6] + PredPel[8] + 2*(PredPel[7]) + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 diagonal down left prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_diag_down_left_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + h264_imgpel_macroblock_row_t *pred; + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + + imgpel *pred_pels; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if (!block_available_up) + printf ("warning: Intra_8x8_Diagonal_Down_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); + + // Mode DIAG_DOWN_LEFT_PRED + pred = &currSlice->mb_pred[pl][joff]; + pred[0+0][ioff+0] = (imgpel) ((PredPel[1] + PredPel[3] + 2*(PredPel[2]) + 2) >> 2); + pred[0+1][ioff+0] = + pred[0+0][ioff+1] = (imgpel) ((PredPel[2] + PredPel[4] + 2*(PredPel[3]) + 2) >> 2); + pred[0+2][ioff+0] = + pred[0+1][ioff+1] = + pred[0+0][ioff+2] = (imgpel) ((PredPel[3] + PredPel[5] + 2*(PredPel[4]) + 2) >> 2); + pred[0+3][ioff+0] = + pred[0+2][ioff+1] = + pred[0+1][ioff+2] = + pred[0+0][ioff+3] = (imgpel) ((PredPel[4] + PredPel[6] + 2*(PredPel[5]) + 2) >> 2); + pred[0+4][ioff+0] = + pred[0+3][ioff+1] = + pred[0+2][ioff+2] = + pred[0+1][ioff+3] = + pred[0+0][ioff+4] = (imgpel) ((PredPel[5] + PredPel[7] + 2*(PredPel[6]) + 2) >> 2); + pred[0+5][ioff+0] = + pred[0+4][ioff+1] = + pred[0+3][ioff+2] = + pred[0+2][ioff+3] = + pred[0+1][ioff+4] = + pred[0+0][ioff+5] = (imgpel) ((PredPel[6] + PredPel[8] + 2*(PredPel[7]) + 2) >> 2); + pred[0+6][ioff+0] = + pred[0+5][ioff+1] = + pred[0+4][ioff+2] = + pred[0+3][ioff+3] = + pred[0+2][ioff+4] = + pred[0+1][ioff+5] = + pred[0+0][ioff+6] = (imgpel) ((PredPel[7] + PredPel[9] + 2*(PredPel[8]) + 2) >> 2); + pred[0+7][ioff+0] = + pred[0+6][ioff+1] = + pred[0+5][ioff+2] = + pred[0+4][ioff+3] = + pred[0+3][ioff+4] = + pred[0+2][ioff+5] = + pred[0+1][ioff+6] = + pred[0+0][ioff+7] = (imgpel) ((PredPel[8] + PredPel[10] + 2*(PredPel[9]) + 2) >> 2); + pred[0+7][ioff+1] = + pred[0+6][ioff+2] = + pred[0+5][ioff+3] = + pred[0+4][ioff+4] = + pred[0+3][ioff+5] = + pred[0+2][ioff+6] = + pred[0+1][ioff+7] = (imgpel) ((PredPel[9] + PredPel[11] + 2*(PredPel[10]) + 2) >> 2); + pred[0+7][ioff+2] = + pred[0+6][ioff+3] = + pred[0+5][ioff+4] = + pred[0+4][ioff+5] = + pred[0+3][ioff+6] = + pred[0+2][ioff+7] = (imgpel) ((PredPel[10] + PredPel[12] + 2*(PredPel[11]) + 2) >> 2); + pred[0+7][ioff+3] = + pred[0+6][ioff+4] = + pred[0+5][ioff+5] = + pred[0+4][ioff+6] = + pred[0+3][ioff+7] = (imgpel) ((PredPel[11] + PredPel[13] + 2*(PredPel[12]) + 2) >> 2); + pred[0+7][ioff+4] = + pred[0+6][ioff+5] = + pred[0+5][ioff+6] = + pred[0+4][ioff+7] = (imgpel) ((PredPel[12] + PredPel[14] + 2*(PredPel[13]) + 2) >> 2); + pred[0+7][ioff+5] = + pred[0+6][ioff+6] = + pred[0+5][ioff+7] = (imgpel) ((PredPel[13] + PredPel[15] + 2*(PredPel[14]) + 2) >> 2); + pred[0+7][ioff+6] = + pred[0+6][ioff+7] = (imgpel) ((PredPel[14] + PredPel[16] + 2*(PredPel[15]) + 2) >> 2); + pred[0+7][ioff+7] = (imgpel) ((PredPel[15] + 3*(PredPel[16]) + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 vertical right prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_vert_right_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + h264_imgpel_macroblock_row_t *pred; + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + imgpel *pred_pels; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_8x8_Vertical_Right prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); +pred = &currSlice->mb_pred[pl][joff]; + pred[0+0][ioff+0] = + pred[0+2][ioff+1] = + pred[0+4][ioff+2] = + pred[0+6][ioff+3] = (imgpel) ((PredPel[0] + PredPel[1] + 1) >> 1); + pred[0+0][ioff+1] = + pred[0+2][ioff+2] = + pred[0+4][ioff+3] = + pred[0+6][ioff+4] = (imgpel) ((PredPel[1] + PredPel[2] + 1) >> 1); + pred[0+0][ioff+2] = + pred[0+2][ioff+3] = + pred[0+4][ioff+4] = + pred[0+6][ioff+5] = (imgpel) ((PredPel[2] + PredPel[3] + 1) >> 1); + pred[0+0][ioff+3] = + pred[0+2][ioff+4] = + pred[0+4][ioff+5] = + pred[0+6][ioff+6] = (imgpel) ((PredPel[3] + PredPel[4] + 1) >> 1); + pred[0+0][ioff+4] = + pred[0+2][ioff+5] = + pred[0+4][ioff+6] = + pred[0+6][ioff+7] = (imgpel) ((PredPel[4] + PredPel[5] + 1) >> 1); + pred[0+0][ioff+5] = + pred[0+2][ioff+6] = + pred[0+4][ioff+7] = (imgpel) ((PredPel[5] + PredPel[6] + 1) >> 1); + pred[0+0][ioff+6] = + pred[0+2][ioff+7] = (imgpel) ((PredPel[6] + PredPel[7] + 1) >> 1); + pred[0+0][ioff+7] = (imgpel) ((PredPel[7] + PredPel[8] + 1) >> 1); + pred[0+1][ioff+0] = + pred[0+3][ioff+1] = + pred[0+5][ioff+2] = + pred[0+7][ioff+3] = (imgpel) ((PredPel[17] + PredPel[1] + 2*PredPel[0] + 2) >> 2); + pred[0+1][ioff+1] = + pred[0+3][ioff+2] = + pred[0+5][ioff+3] = + pred[0+7][ioff+4] = (imgpel) ((PredPel[0] + PredPel[2] + 2*PredPel[1] + 2) >> 2); + pred[0+1][ioff+2] = + pred[0+3][ioff+3] = + pred[0+5][ioff+4] = + pred[0+7][ioff+5] = (imgpel) ((PredPel[1] + PredPel[3] + 2*PredPel[2] + 2) >> 2); + pred[0+1][ioff+3] = + pred[0+3][ioff+4] = + pred[0+5][ioff+5] = + pred[0+7][ioff+6] = (imgpel) ((PredPel[2] + PredPel[4] + 2*PredPel[3] + 2) >> 2); + pred[0+1][ioff+4] = + pred[0+3][ioff+5] = + pred[0+5][ioff+6] = + pred[0+7][ioff+7] = (imgpel) ((PredPel[3] + PredPel[5] + 2*PredPel[4] + 2) >> 2); + pred[0+1][ioff+5] = + pred[0+3][ioff+6] = + pred[0+5][ioff+7] = (imgpel) ((PredPel[4] + PredPel[6] + 2*PredPel[5] + 2) >> 2); + pred[0+1][ioff+6] = + pred[0+3][ioff+7] = (imgpel) ((PredPel[5] + PredPel[7] + 2*PredPel[6] + 2) >> 2); + pred[0+1][ioff+7] = (imgpel) ((PredPel[6] + PredPel[8] + 2*PredPel[7] + 2) >> 2); + pred[0+2][ioff+0] = + pred[0+4][ioff+1] = + pred[0+6][ioff+2] = (imgpel) ((PredPel[18] + PredPel[0] + 2*PredPel[17] + 2) >> 2); + pred[0+3][ioff+0] = + pred[0+5][ioff+1] = + pred[0+7][ioff+2] = (imgpel) ((PredPel[19] + PredPel[17] + 2*PredPel[18] + 2) >> 2); + pred[0+4][ioff+0] = + pred[0+6][ioff+1] = (imgpel) ((PredPel[20] + PredPel[18] + 2*PredPel[19] + 2) >> 2); + pred[0+5][ioff+0] = + pred[0+7][ioff+1] = (imgpel) ((PredPel[21] + PredPel[19] + 2*PredPel[20] + 2) >> 2); + pred[0+6][ioff+0] = (imgpel) ((PredPel[22] + PredPel[20] + 2*PredPel[21] + 2) >> 2); + pred[0+7][ioff+0] = (imgpel) ((PredPel[23] + PredPel[21] + 2*PredPel[22] + 2) >> 2); + + return DECODING_OK; +} + + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 vertical left prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_vert_left_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; +h264_imgpel_macroblock_row_t *pred; + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + imgpel *pred_pels; + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if (!block_available_up) + printf ("warning: Intra_4x4_Vertical_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); +pred = &currSlice->mb_pred[pl][joff]; + pred[0+0][ioff+0] = (imgpel) ((PredPel[1] + PredPel[2] + 1) >> 1); + pred[0+0][ioff+1] = + pred[0+2][ioff+0] = (imgpel) ((PredPel[2] + PredPel[3] + 1) >> 1); + pred[0+0][ioff+2] = + pred[0+2][ioff+1] = + pred[0+4][ioff+0] = (imgpel) ((PredPel[3] + PredPel[4] + 1) >> 1); + pred[0+0][ioff+3] = + pred[0+2][ioff+2] = + pred[0+4][ioff+1] = + pred[0+6][ioff+0] = (imgpel) ((PredPel[4] + PredPel[5] + 1) >> 1); + pred[0+0][ioff+4] = + pred[0+2][ioff+3] = + pred[0+4][ioff+2] = + pred[0+6][ioff+1] = (imgpel) ((PredPel[5] + PredPel[6] + 1) >> 1); + pred[0+0][ioff+5] = + pred[0+2][ioff+4] = + pred[0+4][ioff+3] = + pred[0+6][ioff+2] = (imgpel) ((PredPel[6] + PredPel[7] + 1) >> 1); + pred[0+0][ioff+6] = + pred[0+2][ioff+5] = + pred[0+4][ioff+4] = + pred[0+6][ioff+3] = (imgpel) ((PredPel[7] + PredPel[8] + 1) >> 1); + pred[0+0][ioff+7] = + pred[0+2][ioff+6] = + pred[0+4][ioff+5] = + pred[0+6][ioff+4] = (imgpel) ((PredPel[8] + PredPel[9] + 1) >> 1); + pred[0+2][ioff+7] = + pred[0+4][ioff+6] = + pred[0+6][ioff+5] = (imgpel) ((PredPel[9] + PredPel[10] + 1) >> 1); + pred[0+4][ioff+7] = + pred[0+6][ioff+6] = (imgpel) ((PredPel[10] + PredPel[11] + 1) >> 1); + pred[0+6][ioff+7] = (imgpel) ((PredPel[11] + PredPel[12] + 1) >> 1); + pred[0+1][ioff+0] = (imgpel) ((PredPel[1] + PredPel[3] + 2*PredPel[2] + 2) >> 2); + pred[0+1][ioff+1] = + pred[0+3][ioff+0] = (imgpel) ((PredPel[2] + PredPel[4] + 2*PredPel[3] + 2) >> 2); + pred[0+1][ioff+2] = + pred[0+3][ioff+1] = + pred[0+5][ioff+0] = (imgpel) ((PredPel[3] + PredPel[5] + 2*PredPel[4] + 2) >> 2); + pred[0+1][ioff+3] = + pred[0+3][ioff+2] = + pred[0+5][ioff+1] = + pred[0+7][ioff+0] = (imgpel) ((PredPel[4] + PredPel[6] + 2*PredPel[5] + 2) >> 2); + pred[0+1][ioff+4] = + pred[0+3][ioff+3] = + pred[0+5][ioff+2] = + pred[0+7][ioff+1] = (imgpel) ((PredPel[5] + PredPel[7] + 2*PredPel[6] + 2) >> 2); + pred[0+1][ioff+5] = + pred[0+3][ioff+4] = + pred[0+5][ioff+3] = + pred[0+7][ioff+2] = (imgpel) ((PredPel[6] + PredPel[8] + 2*PredPel[7] + 2) >> 2); + pred[0+1][ioff+6] = + pred[0+3][ioff+5] = + pred[0+5][ioff+4] = + pred[0+7][ioff+3] = (imgpel) ((PredPel[7] + PredPel[9] + 2*PredPel[8] + 2) >> 2); + pred[0+1][ioff+7] = + pred[0+3][ioff+6] = + pred[0+5][ioff+5] = + pred[0+7][ioff+4] = (imgpel) ((PredPel[8] + PredPel[10] + 2*PredPel[9] + 2) >> 2); + pred[0+3][ioff+7] = + pred[0+5][ioff+6] = + pred[0+7][ioff+5] = (imgpel) ((PredPel[9] + PredPel[11] + 2*PredPel[10] + 2) >> 2); + pred[0+5][ioff+7] = + pred[0+7][ioff+6] = (imgpel) ((PredPel[10] + PredPel[12] + 2*PredPel[11] + 2) >> 2); + pred[0+7][ioff+7] = (imgpel) ((PredPel[11] + PredPel[13] + 2*PredPel[12] + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 horizontal up prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_hor_up_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY +h264_imgpel_macroblock_row_t *pred; + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + imgpel *pred_pels; + + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if (!block_available_left) + printf ("warning: Intra_8x8_Horizontal_Up prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); +pred = &currSlice->mb_pred[pl][joff]; + pred[0+0][ioff+0] = (imgpel) ((PredPel[17] + PredPel[18] + 1) >> 1); + pred[0+1][ioff+0] = + pred[0+0][ioff+2] = (imgpel) ((PredPel[18] + PredPel[19] + 1) >> 1); + pred[0+2][ioff+0] = + pred[0+1][ioff+2] = + pred[0+0][ioff+4] = (imgpel) ((PredPel[19] + PredPel[20] + 1) >> 1); + pred[0+3][ioff+0] = + pred[0+2][ioff+2] = + pred[0+1][ioff+4] = + pred[0+0][ioff+6] = (imgpel) ((PredPel[20] + PredPel[21] + 1) >> 1); + pred[0+4][ioff+0] = + pred[0+3][ioff+2] = + pred[0+2][ioff+4] = + pred[0+1][ioff+6] = (imgpel) ((PredPel[21] + PredPel[22] + 1) >> 1); + pred[0+5][ioff+0] = + pred[0+4][ioff+2] = + pred[0+3][ioff+4] = + pred[0+2][ioff+6] = (imgpel) ((PredPel[22] + PredPel[23] + 1) >> 1); + pred[0+6][ioff+0] = + pred[0+5][ioff+2] = + pred[0+4][ioff+4] = + pred[0+3][ioff+6] = (imgpel) ((PredPel[23] + PredPel[24] + 1) >> 1); + pred[0+4][ioff+6] = + pred[0+4][ioff+7] = + pred[0+5][ioff+4] = + pred[0+5][ioff+5] = + pred[0+5][ioff+6] = + pred[0+5][ioff+7] = + pred[0+6][ioff+2] = + pred[0+6][ioff+3] = + pred[0+6][ioff+4] = + pred[0+6][ioff+5] = + pred[0+6][ioff+6] = + pred[0+6][ioff+7] = + pred[0+7][ioff+0] = + pred[0+7][ioff+1] = + pred[0+7][ioff+2] = + pred[0+7][ioff+3] = + pred[0+7][ioff+4] = + pred[0+7][ioff+5] = + pred[0+7][ioff+6] = + pred[0+7][ioff+7] = (imgpel) PredPel[24]; + pred[0+6][ioff+1] = + pred[0+5][ioff+3] = + pred[0+4][ioff+5] = + pred[0+3][ioff+7] = (imgpel) ((PredPel[23] + 3*PredPel[24] + 2) >> 2); + pred[0+5][ioff+1] = + pred[0+4][ioff+3] = + pred[0+3][ioff+5] = + pred[0+2][ioff+7] = (imgpel) ((PredPel[24] + PredPel[22] + 2*PredPel[23] + 2) >> 2); + pred[0+4][ioff+1] = + pred[0+3][ioff+3] = + pred[0+2][ioff+5] = + pred[0+1][ioff+7] = (imgpel) ((PredPel[23] + PredPel[21] + 2*PredPel[22] + 2) >> 2); + pred[0+3][ioff+1] = + pred[0+2][ioff+3] = + pred[0+1][ioff+5] = + pred[0+0][ioff+7] = (imgpel) ((PredPel[22] + PredPel[20] + 2*PredPel[21] + 2) >> 2); + pred[0+2][ioff+1] = + pred[0+1][ioff+3] = + pred[0+0][ioff+5] = (imgpel) ((PredPel[21] + PredPel[19] + 2*PredPel[20] + 2) >> 2); + pred[0+1][ioff+1] = + pred[0+0][ioff+3] = (imgpel) ((PredPel[20] + PredPel[18] + 2*PredPel[19] + 2) >> 2); + pred[0+0][ioff+1] = (imgpel) ((PredPel[19] + PredPel[17] + 2*PredPel[18] + 2) >> 2); + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 8x8 horizontal down prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra8x8_hor_down_pred(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< current image plane + int ioff, //!< pixel offset X within MB + int joff) //!< pixel offset Y within MB +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + imgpel PredPel[25]; // array of predictor pels + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + h264_imgpel_macroblock_row_t *pred; + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + imgpel *pred_pels; + + + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]); + p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]); + + p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b); + p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c); + p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d); + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_8x8_Horizontal_Down prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr); + + // form predictor pels + if (block_available_up) + { + pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x]; + PredPel[1] = pred_pels[0]; + PredPel[2] = pred_pels[1]; + PredPel[3] = pred_pels[2]; + PredPel[4] = pred_pels[3]; + PredPel[5] = pred_pels[4]; + PredPel[6] = pred_pels[5]; + PredPel[7] = pred_pels[6]; + PredPel[8] = pred_pels[7]; + } + else + { + PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_right) + { + pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x]; + PredPel[9] = pred_pels[0]; + PredPel[10] = pred_pels[1]; + PredPel[11] = pred_pels[2]; + PredPel[12] = pred_pels[3]; + PredPel[13] = pred_pels[4]; + PredPel[14] = pred_pels[5]; + PredPel[15] = pred_pels[6]; + PredPel[16] = pred_pels[7]; + + } + else + { + PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8]; + } + + if (block_available_left) + { + PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + if (block_available_up_left) + { + PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl]; + } + + LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left); +pred = &currSlice->mb_pred[pl][joff]; + pred[0][ioff] = + pred[0+1][ioff+2] = + pred[0+2][ioff+4] = + pred[0+3][ioff+6] = (imgpel) ((PredPel[17] + PredPel[0] + 1) >> 1); + pred[0+1][ioff] = + pred[0+2][ioff+2] = + pred[0+3][ioff+4] = + pred[0+4][ioff+6] = (imgpel) ((PredPel[18] + PredPel[17] + 1) >> 1); + pred[0+2][ioff] = + pred[0+3][ioff+2] = + pred[0+4][ioff+4] = + pred[0+5][ioff+6] = (imgpel) ((PredPel[19] + PredPel[18] + 1) >> 1); + pred[0+3][ioff] = + pred[0+4][ioff+2] = + pred[0+5][ioff+4] = + pred[0+6][ioff+6] = (imgpel) ((PredPel[20] + PredPel[19] + 1) >> 1); + pred[0+4][ioff] = + pred[0+5][ioff+2] = + pred[0+6][ioff+4] = + pred[0+7][ioff+6] = (imgpel) ((PredPel[21] + PredPel[20] + 1) >> 1); + pred[0+5][ioff] = + pred[0+6][ioff+2] = + pred[0+7][ioff+4] = (imgpel) ((PredPel[22] + PredPel[21] + 1) >> 1); + pred[0+6][ioff] = + pred[0+7][ioff+2] = (imgpel) ((PredPel[23] + PredPel[22] + 1) >> 1); + pred[0+7][ioff] = (imgpel) ((PredPel[24] + PredPel[23] + 1) >> 1); + pred[0][ioff+1] = + pred[0+1][ioff+3] = + pred[0+2][ioff+5] = + pred[0+3][ioff+7] = (imgpel) ((PredPel[17] + PredPel[1] + 2*PredPel[0] + 2) >> 2); + pred[0+1][ioff+1] = + pred[0+2][ioff+3] = + pred[0+3][ioff+5] = + pred[0+4][ioff+7] = (imgpel) ((PredPel[0] + PredPel[18] + 2*PredPel[17] + 2) >> 2); + pred[0+2][ioff+1] = + pred[0+3][ioff+3] = + pred[0+4][ioff+5] = + pred[0+5][ioff+7] = (imgpel) ((PredPel[17] + PredPel[19] + 2*PredPel[18] + 2) >> 2); + pred[0+3][ioff+1] = + pred[0+4][ioff+3] = + pred[0+5][ioff+5] = + pred[0+6][ioff+7] = (imgpel) ((PredPel[18] + PredPel[20] + 2*PredPel[19] + 2) >> 2); + pred[0+4][ioff+1] = + pred[0+5][ioff+3] = + pred[0+6][ioff+5] = + pred[0+7][ioff+7] = (imgpel) ((PredPel[19] + PredPel[21] + 2*PredPel[20] + 2) >> 2); + pred[0+5][ioff+1] = + pred[0+6][ioff+3] = + pred[0+7][ioff+5] = (imgpel) ((PredPel[20] + PredPel[22] + 2*PredPel[21] + 2) >> 2); + pred[0+6][ioff+1] = + pred[0+7][ioff+3] = (imgpel) ((PredPel[21] + PredPel[23] + 2*PredPel[22] + 2) >> 2); + pred[0+7][ioff+1] = (imgpel) ((PredPel[22] + PredPel[24] + 2*PredPel[23] + 2) >> 2); + pred[0][ioff+2] = + pred[0+1][ioff+4] = + pred[0+2][ioff+6] = (imgpel) ((PredPel[0] + PredPel[2] + 2*PredPel[1] + 2) >> 2); + pred[0][ioff+3] = + pred[0+1][ioff+5] = + pred[0+2][ioff+7] = (imgpel) ((PredPel[1] + PredPel[3] + 2*PredPel[2] + 2) >> 2); + pred[0][ioff+4] = + pred[0+1][ioff+6] = (imgpel) ((PredPel[2] + PredPel[4] + 2*PredPel[3] + 2) >> 2); + pred[0][ioff+5] = + pred[0+1][ioff+7] = (imgpel) ((PredPel[3] + PredPel[5] + 2*PredPel[4] + 2) >> 2); + pred[0][ioff+6] = (imgpel) ((PredPel[4] + PredPel[6] + 2*PredPel[5] + 2) >> 2); + pred[0][ioff+7] = (imgpel) ((PredPel[5] + PredPel[7] + 2*PredPel[6] + 2) >> 2); + + return DECODING_OK; +} + +/*! + ************************************************************************ + * \brief + * Make intra 8x8 prediction according to all 9 prediction modes. + * The routine uses left and upper neighbouring points from + * previous coded blocks to do this (if available). Notice that + * inaccessible neighbouring points are signalled with a negative + * value in the predmode array . + * + * \par Input: + * Starting point of current 8x8 block image position + * + ************************************************************************ + */ +int intrapred8x8(Macroblock *currMB, //!< Current Macroblock + ColorPlane pl, //!< Current color plane + int ioff, //!< ioff + int joff) //!< joff + +{ + VideoParameters *p_Vid = currMB->p_Vid; + int block_x = (currMB->block_x) + (ioff >> 2); + int block_y = (currMB->block_y) + (joff >> 2); + byte predmode = p_Vid->ipredmode[block_y][block_x]; + + currMB->ipmode_DPCM = predmode; //For residual DPCM + + switch (predmode) + { + case DC_PRED: + return (intra8x8_dc_pred(currMB, pl, ioff, joff)); + break; + case VERT_PRED: + return (intra8x8_vert_pred(currMB, pl, ioff, joff)); + break; + case HOR_PRED: + return (intra8x8_hor_pred(currMB, pl, ioff, joff)); + break; + case DIAG_DOWN_RIGHT_PRED: + return (intra8x8_diag_down_right_pred(currMB, pl, ioff, joff)); + break; + case DIAG_DOWN_LEFT_PRED: + return (intra8x8_diag_down_left_pred(currMB, pl, ioff, joff)); + break; + case VERT_RIGHT_PRED: + return (intra8x8_vert_right_pred(currMB, pl, ioff, joff)); + break; + case VERT_LEFT_PRED: + return (intra8x8_vert_left_pred(currMB, pl, ioff, joff)); + break; + case HOR_UP_PRED: + return (intra8x8_hor_up_pred(currMB, pl, ioff, joff)); + break; + case HOR_DOWN_PRED: + return (intra8x8_hor_down_pred(currMB, pl, ioff, joff)); + default: + printf("Error: illegal intra_8x8 prediction mode: %d\n", (int) predmode); + return SEARCH_SYNC; + break; + } + + return DECODING_OK; +} + + diff --git a/Src/h264dec/ldecod/src/intra_chroma_pred.c b/Src/h264dec/ldecod/src/intra_chroma_pred.c new file mode 100644 index 00000000..5c45ec6b --- /dev/null +++ b/Src/h264dec/ldecod/src/intra_chroma_pred.c @@ -0,0 +1,357 @@ +/*! +************************************************************************************* +* \file intra_chroma_pred.c +* +* \brief +* Functions for intra chroma prediction +* +* \author +* Main contributors (see contributors.h for copyright, +* address and affiliation details) +* - Alexis Michael Tourapis <alexismt@ieee.org> +* +************************************************************************************* +*/ +#include "global.h" +#include "block.h" +#include "mb_access.h" +#include "image.h" + +static void intra_chroma_DC_single(imgpel **curr_img, int up_avail, int left_avail, PixelPos up, PixelPos left[17], int blk_x, int blk_y, int *pred, int direction ) +{ + int s0; + + if ((direction && up_avail) || (!left_avail && up_avail)) + { + imgpel *img = &curr_img[up.pos_y][up.pos_x + blk_x]; + s0 = img[0] + img[1] + img[2] + img[3]; + *pred = (s0+2) >> 2; + } + else if (left_avail) + { + s0 = curr_img[left[blk_y].pos_y][left[blk_y].pos_x]; + s0 += curr_img[left[blk_y+1].pos_y][left[blk_y+1].pos_x]; + s0 += curr_img[left[blk_y+2].pos_y][left[blk_y+2].pos_x]; + s0 += curr_img[left[blk_y+3].pos_y][left[blk_y+3].pos_x]; + + *pred = (s0+2) >> 2; + } +} + + +static void intra_chroma_DC_all(imgpel **curr_img, int up_avail, int left_avail, PixelPos up, PixelPos left[17], int blk_x, int blk_y, int *pred ) +{ + int s0 = 0, s1 = 0; + + if (up_avail) + { + imgpel *img = &curr_img[up.pos_y][up.pos_x + blk_x]; + s0 = img[0] + img[1] + img[2] + img[3]; + } + + if (left_avail) + { + s1 += curr_img[left[blk_y].pos_y][left[blk_y].pos_x]; + s1 += curr_img[left[blk_y+1].pos_y][left[blk_y+1].pos_x]; + s1 += curr_img[left[blk_y+2].pos_y][left[blk_y+2].pos_x]; + s1 += curr_img[left[blk_y+3].pos_y][left[blk_y+3].pos_x]; + } + + if (up_avail && left_avail) + *pred = (s0 + s1 + 4) >> 3; + else if (up_avail) + *pred = (s0 + 2) >> 2; + else if (left_avail) + *pred = (s1 + 2) >> 2; +} + +/*! +************************************************************************ +* \brief +* Chroma Intra prediction. Note that many operations can be moved +* outside since they are repeated for both components for no reason. +************************************************************************ +*/ + +static void memset_4x4(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, int pred) +{ +#ifdef _M_IX86 + // benski> can't believe the shitty code that the compiler generated... this code is better + int dword_pred = pred * 0x01010101; + mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x]; + *(int *)mb_pred[0] = dword_pred; + *(int *)mb_pred[1] = dword_pred; + *(int *)mb_pred[2] = dword_pred; + *(int *)mb_pred[3] = dword_pred; +#else + int ii, jj; + for (jj = 0; jj < BLOCK_SIZE; jj++) + { + for (ii = 0; ii < BLOCK_SIZE; ii++) + { + mb_pred[jj][offset_x+ii]=(imgpel) pred; + } + } +#endif +} + +static void chroma_dc_pred8(VideoParameters *p_Vid, int yuv, imgpel **imgUV, int up_avail, int left_avail[2], PixelPos up, PixelPos left[17], h264_imgpel_macroblock_row_t *mb_pred) +{ + static const byte block_pos[3][4][4]= //[yuv][b8][b4] + { + { {0, 1, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0},{0, 0, 0, 0}}, + { {0, 1, 2, 3},{2, 3, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0}}, + { {0, 1, 2, 3},{1, 1, 3, 3},{2, 3, 2, 3},{3, 3, 3, 3}} + }; + + int b8, b4; + int pred; + + // DC prediction + // Note that unlike what is stated in many presentations and papers, this mode does not operate + // the same way as I_16x16 DC prediction. + + for(b8 = 0; b8 < (p_Vid->num_uv_blocks) ;b8++) + { + for (b4 = 0; b4 < 4; b4++) + { + int blk_y = subblk_offset_y[yuv][b8][b4]; + int blk_x = subblk_offset_x[yuv][b8][b4]; + + pred = p_Vid->dc_pred_value_comp[1]; + + //===== get prediction value ===== + switch (block_pos[yuv][b8][b4]) + { + case 0: //===== TOP LEFT ===== + intra_chroma_DC_all (imgUV, up_avail, left_avail[0], up, left, blk_x, blk_y + 1, &pred); + break; + case 1: //===== TOP RIGHT ===== + intra_chroma_DC_single(imgUV, up_avail, left_avail[0], up, left, blk_x, blk_y + 1, &pred, 1); + break; + case 2: //===== BOTTOM LEFT ===== + intra_chroma_DC_single(imgUV, up_avail, left_avail[1], up, left, blk_x, blk_y + 1, &pred, 0); + break; + case 3: //===== BOTTOM RIGHT ===== + intra_chroma_DC_all (imgUV, up_avail, left_avail[1], up, left, blk_x, blk_y + 1, &pred); + break; + } + + memset_4x4(mb_pred+blk_y, blk_x, pred); + } + } + +} + +static void chroma_pred_horiz8(int cr_MB_x, int cr_MB_y, PixelPos left[17], imgpel **imgUV, h264_imgpel_macroblock_row_t *mb_pred) +{ + // Horizontal Prediction + int i,j; + + if (cr_MB_x == 8) + { + for (j = 0; j < cr_MB_y; ++j) + { + + int pred = imgUV[left[1 + j].pos_y][left[1 + j].pos_x]; + for (i = 0; i < 8; ++i) + mb_pred[j][i]=(imgpel) pred; + } + } + else + { + assert(cr_MB_x == 16); + for (j = 0; j < cr_MB_y; ++j) + { + + int pred = imgUV[left[1 + j].pos_y][left[1 + j].pos_x]; + for (i = 0; i < 16; ++i) + mb_pred[j][i]=(imgpel) pred; + } + } +} + +static void chroma_pred_vert8(int cr_MB_x, int cr_MB_y, PixelPos up, imgpel **imgUV, h264_imgpel_macroblock_row_t *mb_pred) +{ + // Vertical Prediction + const imgpel *source = &(imgUV[up.pos_y][up.pos_x]); + if (cr_MB_x == 8) + { + int j; + for (j = 0; j < cr_MB_y; ++j) + { + memcpy(mb_pred[j], source, 8 * sizeof(imgpel)); + } + } + else + { + int j; + assert(cr_MB_x == 16); + + + for (j = 0; j < cr_MB_y; ++j) + { + memcpy(mb_pred[j], source, 16 * sizeof(imgpel)); + } + } +} + +static void chroma_pred_plane8(int cr_MB_x, int cr_MB_y, int cr_MB_x2, int cr_MB_y2, PixelPos up, PixelPos left[17], int max_imgpel_value, imgpel **imgUV, h264_imgpel_macroblock_row_t *mb_pred) +{ + int ih, iv, ib, ic, i, j, iaa; + imgpel *upPred = &imgUV[up.pos_y][up.pos_x]; + + ih = cr_MB_x2 * (upPred[cr_MB_x - 1] - imgUV[left[0].pos_y][left[0].pos_x]); + for (i = 0; i < cr_MB_x2 - 1; ++i) + ih += (i + 1) * (upPred[cr_MB_x2 + i] - upPred[cr_MB_x2 - 2 - i]); + + iv = cr_MB_y2 * (imgUV[left[cr_MB_y].pos_y][left[cr_MB_y].pos_x] - imgUV[left[0].pos_y][left[0].pos_x]); + for (i = 0; i < cr_MB_y2 - 1; ++i) + iv += (i + 1)*(imgUV[left[cr_MB_y2 + 1 + i].pos_y][left[cr_MB_y2 + 1 + i].pos_x] - + imgUV[left[cr_MB_y2 - 1 - i].pos_y][left[cr_MB_y2 - 1 - i].pos_x]); + + ib= ((cr_MB_x == 8 ? 17 : 5) * ih + 2 * cr_MB_x)>>(cr_MB_x == 8 ? 5 : 6); + ic= ((cr_MB_y == 8 ? 17 : 5) * iv + 2 * cr_MB_y)>>(cr_MB_y == 8 ? 5 : 6); + + iaa=16*(imgUV[left[cr_MB_y].pos_y][left[cr_MB_y].pos_x] + upPred[cr_MB_x-1]); + + for (j = 0; j < cr_MB_y; ++j) + for (i = 0; i < cr_MB_x; ++i) + mb_pred[j][i]=(imgpel) iClip1(max_imgpel_value, ((iaa + (i - cr_MB_x2 + 1) * ib + (j - cr_MB_y2 + 1) * ic + 16) >> 5)); +} + +// TODO: benski> replace with PredictIntraChroma8x8_H264 ? +void intrapred_chroma(Macroblock *currMB, int uv) +{ + if (currMB->c_ipred_mode == VERT_PRED_8) + { + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + imgpel **imgUV = dec_picture->imgUV[uv]->img; + + h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[uv + 1]; + + PixelPos up; //!< pixel position p(0,-1) + + int up_avail; + + int cr_MB_x = p_Vid->mb_cr_size_x; + int cr_MB_y = p_Vid->mb_cr_size_y; + + p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + } + + // Vertical Prediction + if (!up_avail) + error("unexpected VERT_PRED_8 chroma intra prediction mode",-1); + + chroma_pred_vert8(cr_MB_x, cr_MB_y, up, imgUV, mb_pred); + } + else + { + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int i; + StorablePicture *dec_picture = p_Vid->dec_picture; + imgpel **imgUV = dec_picture->imgUV[uv]->img; + int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1]; + + int yuv = dec_picture->chroma_format_idc - 1; + h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[uv + 1]; + + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..16) + + int up_avail, left_avail[2], left_up_avail; + + int cr_MB_x = p_Vid->mb_cr_size_x; + int cr_MB_y = p_Vid->mb_cr_size_y; + int cr_MB_y2 = (cr_MB_y >> 1); + int cr_MB_x2 = (cr_MB_x >> 1); + + p_Vid->getNeighbourNX(currMB, -1, p_Vid->mb_size[IS_CHROMA], &left[0]); + p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[IS_CHROMA], &left[1]); + + p_Vid->getNeighbourNPChromaNB(currMB, 2-1, p_Vid->mb_size[IS_CHROMA], &left[2]); + p_Vid->getNeighbourNPChromaNB(currMB, 3-1, p_Vid->mb_size[IS_CHROMA], &left[3]); + p_Vid->getNeighbourNPChromaNB(currMB, 4-1, p_Vid->mb_size[IS_CHROMA], &left[4]); + p_Vid->getNeighbourNPChromaNB(currMB, 5-1, p_Vid->mb_size[IS_CHROMA], &left[5]); + p_Vid->getNeighbourNPChromaNB(currMB, 6-1, p_Vid->mb_size[IS_CHROMA], &left[6]); + p_Vid->getNeighbourNPChromaNB(currMB, 7-1, p_Vid->mb_size[IS_CHROMA], &left[7]); + p_Vid->getNeighbourNPChromaNB(currMB, 8-1, p_Vid->mb_size[IS_CHROMA], &left[8]); + + if (cr_MB_y == 16) + { + p_Vid->getNeighbourNPChromaNB(currMB, 9-1, p_Vid->mb_size[IS_CHROMA], &left[9]); + p_Vid->getNeighbourNPChromaNB(currMB, 10-1, p_Vid->mb_size[IS_CHROMA], &left[10]); + p_Vid->getNeighbourNPChromaNB(currMB, 11-1, p_Vid->mb_size[IS_CHROMA], &left[11]); + p_Vid->getNeighbourNPChromaNB(currMB, 12-1, p_Vid->mb_size[IS_CHROMA], &left[12]); + p_Vid->getNeighbourNPChromaNB(currMB, 13-1, p_Vid->mb_size[IS_CHROMA], &left[13]); + p_Vid->getNeighbourNPChromaNB(currMB, 14-1, p_Vid->mb_size[IS_CHROMA], &left[14]); + p_Vid->getNeighbourNPChromaNB(currMB, 15-1, p_Vid->mb_size[IS_CHROMA], &left[15]); + p_Vid->getNeighbourNPChromaNB(currMB, 16-1, p_Vid->mb_size[IS_CHROMA], &left[16]); + } + + p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + left_avail[0] = left_avail[1] = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + for (i=0, left_avail[0] = 1; i < cr_MB_y2;++i) + left_avail[0] &= left[i + 1].available ? p_Vid->intra_block[left[i + 1].mb_addr]: 0; + + for (i = cr_MB_y2, left_avail[1] = 1; i<cr_MB_y;++i) + left_avail[1] &= left[i + 1].available ? p_Vid->intra_block[left[i + 1].mb_addr]: 0; + + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + switch (currMB->c_ipred_mode) + { + case DC_PRED_8: + chroma_dc_pred8(p_Vid, yuv, imgUV, up_avail, left_avail, up, left, mb_pred); + break; + case HOR_PRED_8: + { + // Horizontal Prediction + if (!left_avail[0] || !left_avail[1]) + error("unexpected HOR_PRED_8 chroma intra prediction mode",-1); + + chroma_pred_horiz8(cr_MB_x, cr_MB_y, left, imgUV, mb_pred); + } + break; + case PLANE_8: + // plane prediction + if (!left_up_avail || !left_avail[0] || !left_avail[1] || !up_avail) + error("unexpected PLANE_8 chroma intra prediction mode",-1); + else + { + chroma_pred_plane8(cr_MB_x, cr_MB_y, cr_MB_x2, cr_MB_y2, up, left, max_imgpel_value, imgUV, mb_pred); + } + break; + default: + error("illegal chroma intra prediction mode", 600); + break; + } + } +} + + + + diff --git a/Src/h264dec/ldecod/src/ldecod.c b/Src/h264dec/ldecod/src/ldecod.c new file mode 100644 index 00000000..39919e1e --- /dev/null +++ b/Src/h264dec/ldecod/src/ldecod.c @@ -0,0 +1,639 @@ + +/*! + *********************************************************************** + * \mainpage + * This is the H.264/AVC decoder reference software. For detailed documentation + * see the comments in each file. + * + * The JM software web site is located at: + * http://iphome.hhi.de/suehring/tml + * + * For bug reporting and known issues see: + * https://ipbt.hhi.de + * + * \author + * The main contributors are listed in contributors.h + * + * \version + * JM 16.1 (FRExt) + * + * \note + * tags are used for document system "doxygen" + * available at http://www.doxygen.org + */ +/*! + * \file + * ldecod.c + * \brief + * H.264/AVC reference decoder project main() + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langøy <inge.lille-langoy@telenor.com> + * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + * - Stephan Wenger <stewe@cs.tu-berlin.de> + * - Jani Lainema <jani.lainema@nokia.com> + * - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> + * - Byeong-Moon Jeon <jeonbm@lge.com> + * - Gabi Blaettermann + * - Ye-Kui Wang <wyk@ieee.org> + * - Valeri George <george@hhi.de> + * - Karsten Suehring <suehring@hhi.de> + * + *********************************************************************** + */ + +#include "contributors.h" + +#include <sys/stat.h> + +#include "global.h" +#include "image.h" +#include "memalloc.h" +#include "mc_prediction.h" +#include "mbuffer.h" +#include "leaky_bucket.h" +#include "fmo.h" +#include "output.h" +#include "cabac.h" +#include "parset.h" +#include "sei.h" +#include "erc_api.h" +#include "quant.h" +#include "block.h" +#include "nalu.h" +#include "meminput.h" +#define LOGFILE "log.dec" +#define DATADECFILE "dataDec.txt" +#define TRACEFILE "trace_dec.txt" + +// Decoder definition. This should be the only global variable in the entire +// software. Global variables should be avoided. +char errortext[ET_SIZE]; //!< buffer for error message for exit with error() + +#ifdef TRACE +FILE *p_trace=0; +int bitcounter=0; +#endif + +// Prototypes of static functions +void init (VideoParameters *p_Vid); +void malloc_slice(InputParameters *p_Inp, VideoParameters *p_Vid); +void free_slice (Slice *currSlice); + +void init_frext(VideoParameters *p_Vid); + +/*! + ************************************************************************ + * \brief + * Error handling procedure. Print error message to stderr and exit + * with supplied code. + * \param text + * Error message + * \param code + * Exit code + ************************************************************************ + */ +void error(char *text, int code) +{ + RaiseException(code, 0, 1, (ULONG_PTR *)text); + //fprintf(stderr, "%s\n", text); + //flush_dpb(p_Dec->p_Vid); + //exit(code); +} + +/*static */void Configure(VideoParameters *p_Vid, InputParameters *p_Inp) +{ + p_Vid->p_Inp = p_Inp; + + p_Inp->intra_profile_deblocking = 0; + +#ifdef _LEAKYBUCKET_ + p_Inp->R_decoder=500000; //! Decoder rate + p_Inp->B_decoder=104000; //! Decoder buffer size + p_Inp->F_decoder=73000; //! Decoder initial delay + strcpy(p_Inp->LeakyBucketParamFile,"leakybucketparam.cfg"); // file where Leaky Bucket parameters (computed by encoder) are stored +#endif + +} + +/*! + *********************************************************************** + * \brief + * Allocate the Image structure + * \par Output: + * Image Parameters VideoParameters *p_Vid + *********************************************************************** + */ +static void alloc_img( VideoParameters **p_Vid) +{ + if ((*p_Vid = (VideoParameters *) calloc(1, sizeof(VideoParameters)))==NULL) + no_mem_exit("alloc_img: p_Vid"); + + if (((*p_Vid)->old_slice = (OldSliceParams *) calloc(1, sizeof(OldSliceParams)))==NULL) + no_mem_exit("alloc_img: p_Vid->old_slice"); + + if (((*p_Vid)->p_Dpb = (DecodedPictureBuffer*)calloc(1, sizeof(DecodedPictureBuffer)))==NULL) + no_mem_exit("alloc_img: p_Vid->p_Dpb"); + + (*p_Vid)->p_Dpb->init_done = 0; + + (*p_Vid)->global_init_done = 0; + +#if (ENABLE_OUTPUT_TONEMAPPING) + if (((*p_Vid)->seiToneMapping = (ToneMappingSEI*)calloc(1, sizeof(ToneMappingSEI)))==NULL) + no_mem_exit("alloc_img: (*p_Vid)->seiToneMapping"); +#endif + +} + + +/*! + *********************************************************************** + * \brief + * Allocate the Input structure + * \par Output: + * Input Parameters InputParameters *p_Vid + *********************************************************************** + */ +static void alloc_params( InputParameters **p_Inp ) +{ + if ((*p_Inp = (InputParameters *) calloc(1, sizeof(InputParameters)))==NULL) + no_mem_exit("alloc_params: p_Inp"); +} + + /*! + *********************************************************************** + * \brief + * Allocate the Decoder Structure + * \par Output: + * Decoder Parameters + *********************************************************************** + */ +DecoderParams *alloc_decoder() +{ + DecoderParams *decoder = (DecoderParams *) calloc(1, sizeof(DecoderParams)); + if (decoder) + + { + alloc_img(&(decoder->p_Vid)); + alloc_params(&(decoder->p_Inp)); +#ifdef TRACE + p_trace = 0; + bitcounter = 0; +#endif + } + return decoder; +} + +/*! + *********************************************************************** + * \brief + * Free the Image structure + * \par Input: + * Image Parameters VideoParameters *p_Vid + *********************************************************************** + */ +void free_img( VideoParameters *p_Vid) +{ + if (p_Vid != NULL) + { + free_mem_input(p_Vid); +#if (ENABLE_OUTPUT_TONEMAPPING) + if (p_Vid->seiToneMapping != NULL) + { + free (p_Vid->seiToneMapping); + p_Vid->seiToneMapping = NULL; + } +#endif + + if (p_Vid->p_Dpb != NULL) + { + free (p_Vid->p_Dpb); + p_Vid->p_Dpb = NULL; + } + if (p_Vid->old_slice != NULL) + { + free (p_Vid->old_slice); + p_Vid->old_slice = NULL; + } + + free (p_Vid); + p_Vid = NULL; + } +} +/*! + *********************************************************************** + * \brief + * main function for TML decoder + *********************************************************************** + */ +#if 0 +int main(int argc, char **argv) +{ + DecoderParams *p_Dec = alloc_decoder(); + if (!p_Dec) + return 1; + + Configure(p_Dec->p_Vid, p_Dec->p_Inp, argc, argv); + + initBitsFile(p_Dec->p_Vid, p_Dec->p_Inp->FileFormat); + + p_Dec->p_Vid->bitsfile->OpenBitsFile(p_Dec->p_Vid, p_Dec->p_Inp->infile); + + // Allocate Slice data struct + malloc_slice(p_Dec->p_Inp, p_Dec->p_Vid); + init_old_slice(p_Dec->p_Vid->old_slice); + + init(p_Dec->p_Vid); + + init_out_buffer(p_Dec->p_Vid); + + while (decode_one_frame(p_Dec->p_Vid) != EOS) + ; + + free_slice(p_Dec->p_Vid->currentSlice); + FmoFinit(p_Dec->p_Vid); + + free_global_buffers(p_Dec->p_Vid); + flush_dpb(p_Dec->p_Vid); + +#if (PAIR_FIELDS_IN_OUTPUT) + flush_pending_output(p_Dec->p_Vid, p_Dec->p_Vid->p_out); +#endif + + p_Dec->p_Vid->bitsfile->CloseBitsFile(p_Dec->p_Vid); + + close(p_Dec->p_Vid->p_out); + + if (p_Dec->p_Vid->p_ref != -1) + close(p_Dec->p_Vid->p_ref); + +#if TRACE + fclose(p_trace); +#endif + + ercClose(p_Dec->p_Vid, p_Dec->p_Vid->erc_errorVar); + + CleanUpPPS(p_Dec->p_Vid); + free_dpb(p_Dec->p_Vid); + uninit_out_buffer(p_Dec->p_Vid); + + free (p_Dec->p_Inp); + free_img (p_Dec->p_Vid); + free(p_Dec); + + return 0; +} +#endif + +/*! + *********************************************************************** + * \brief + * Initilize some arrays + *********************************************************************** + */ +void init(VideoParameters *p_Vid) //!< image parameters +{ + int i; + InputParameters *p_Inp = p_Vid->p_Inp; + p_Vid->oldFrameSizeInMbs = -1; + + p_Vid->recovery_point = 0; + p_Vid->recovery_point_found = 0; + p_Vid->recovery_poc = 0x7fffffff; /* set to a max value */ + + p_Vid->number = 0; + p_Vid->type = I_SLICE; + + p_Vid->dec_ref_pic_marking_buffer = NULL; + + p_Vid->dec_picture = NULL; + // reference flag initialization + for(i=0;i<17;++i) + { + p_Vid->ref_flag[i] = 1; + } + + p_Vid->MbToSliceGroupMap = NULL; + p_Vid->MapUnitToSliceGroupMap = NULL; + + p_Vid->LastAccessUnitExists = 0; + p_Vid->NALUCount = 0; + + + p_Vid->out_buffer = NULL; + p_Vid->pending_output = NULL; + p_Vid->pending_output_state = FRAME; + p_Vid->recovery_flag = 0; + + +#if (ENABLE_OUTPUT_TONEMAPPING) + init_tone_mapping_sei(p_Vid->seiToneMapping); +#endif + +} + +/*! + *********************************************************************** + * \brief + * Initialize FREXT variables + *********************************************************************** + */ +void init_frext(VideoParameters *p_Vid) //!< image parameters +{ + //pel bitdepth init + p_Vid->bitdepth_luma_qp_scale = 6 * (p_Vid->bitdepth_luma - 8); + + p_Vid->dc_pred_value_comp[0] = 1<<(p_Vid->bitdepth_luma - 1); + p_Vid->max_pel_value_comp[0] = (1<<p_Vid->bitdepth_luma) - 1; + p_Vid->mb_size[IS_LUMA][0] = p_Vid->mb_size[IS_LUMA][1] = MB_BLOCK_SIZE; + + if (p_Vid->active_sps->chroma_format_idc != YUV400) + { + //for chrominance part + p_Vid->bitdepth_chroma_qp_scale = 6 * (p_Vid->bitdepth_chroma - 8); + p_Vid->dc_pred_value_comp[1] = (1 << (p_Vid->bitdepth_chroma - 1)); + p_Vid->dc_pred_value_comp[2] = p_Vid->dc_pred_value_comp[1]; + p_Vid->max_pel_value_comp[1] = (1 << p_Vid->bitdepth_chroma) - 1; + p_Vid->max_pel_value_comp[2] = (1 << p_Vid->bitdepth_chroma) - 1; + p_Vid->num_blk8x8_uv = (1 << p_Vid->active_sps->chroma_format_idc) & (~(0x1)); + p_Vid->num_uv_blocks = (p_Vid->num_blk8x8_uv >> 1); + p_Vid->num_cdc_coeff = (p_Vid->num_blk8x8_uv << 1); + p_Vid->mb_size[IS_CHROMA][0] = p_Vid->mb_size[2][0] = p_Vid->mb_cr_size_x = (p_Vid->active_sps->chroma_format_idc==YUV420 || p_Vid->active_sps->chroma_format_idc==YUV422)? 8 : 16; + p_Vid->mb_size[IS_CHROMA][1] = p_Vid->mb_size[2][1] = p_Vid->mb_cr_size_y = (p_Vid->active_sps->chroma_format_idc==YUV444 || p_Vid->active_sps->chroma_format_idc==YUV422)? 16 : 8; + + p_Vid->subpel_x = p_Vid->mb_cr_size_x == 8 ? 7 : 3; + p_Vid->subpel_y = p_Vid->mb_cr_size_y == 8 ? 7 : 3; + p_Vid->shiftpel_x = p_Vid->mb_cr_size_x == 8 ? 3 : 2; + p_Vid->shiftpel_y = p_Vid->mb_cr_size_y == 8 ? 3 : 2; + } + else + { + p_Vid->bitdepth_chroma_qp_scale = 0; + p_Vid->max_pel_value_comp[1] = 0; + p_Vid->max_pel_value_comp[2] = 0; + p_Vid->num_blk8x8_uv = 0; + p_Vid->num_uv_blocks = 0; + p_Vid->num_cdc_coeff = 0; + p_Vid->mb_size[IS_CHROMA][0] = p_Vid->mb_size[2][0] = p_Vid->mb_cr_size_x = 0; + p_Vid->mb_size[IS_CHROMA][1] = p_Vid->mb_size[2][1] = p_Vid->mb_cr_size_y = 0; + + p_Vid->subpel_x = 0; + p_Vid->subpel_y = 0; + p_Vid->shiftpel_x = 0; + p_Vid->shiftpel_y = 0; + } + p_Vid->mb_size_blk[0][0] = p_Vid->mb_size_blk[0][1] = p_Vid->mb_size[0][0] >> 2; + p_Vid->mb_size_blk[1][0] = p_Vid->mb_size_blk[2][0] = p_Vid->mb_size[1][0] >> 2; + p_Vid->mb_size_blk[1][1] = p_Vid->mb_size_blk[2][1] = p_Vid->mb_size[1][1] >> 2; + + p_Vid->mb_size_shift[0][0] = p_Vid->mb_size_shift[0][1] = CeilLog2_sf (p_Vid->mb_size[0][0]); + p_Vid->mb_size_shift[1][0] = p_Vid->mb_size_shift[2][0] = CeilLog2_sf (p_Vid->mb_size[1][0]); + p_Vid->mb_size_shift[1][1] = p_Vid->mb_size_shift[2][1] = CeilLog2_sf (p_Vid->mb_size[1][1]); +} + +/*! + ************************************************************************ + * \brief + * Allocates a stand-alone partition structure. Structure should + * be freed by FreePartition(); + * data structures + * + * \par Input: + * n: number of partitions in the array + * \par return + * pointer to DataPartition Structure, zero-initialized + ************************************************************************ + */ + +DataPartition *AllocPartition(int n) +{ + DataPartition *partArr, *dataPart; + int i; + + partArr = (DataPartition *) calloc(n, sizeof(DataPartition)); + if (partArr == NULL) + { + snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for Data Partition failed"); + error(errortext, 100); + } + + for (i=0; i<n; ++i) // loop over all data partitions + { + dataPart = &(partArr[i]); + dataPart->bitstream = (Bitstream *) calloc(1, sizeof(Bitstream)); + if (dataPart->bitstream == NULL) + { + snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for Bitstream failed"); + error(errortext, 100); + } + dataPart->bitstream->streamBuffer = 0; + } + return partArr; +} + + + + +/*! + ************************************************************************ + * \brief + * Frees a partition structure (array). + * + * \par Input: + * Partition to be freed, size of partition Array (Number of Partitions) + * + * \par return + * None + * + * \note + * n must be the same as for the corresponding call of AllocPartition + ************************************************************************ + */ + + +void FreePartition (DataPartition *dp, int n) +{ + int i; + + assert (dp != NULL); + assert (dp->bitstream != NULL); + //assert (dp->bitstream->streamBuffer != NULL); + for (i=0; i<n; ++i) + { + //free (dp[i].bitstream->streamBuffer); + free (dp[i].bitstream); + } + free (dp); +} + + +/*! + ************************************************************************ + * \brief + * Allocates the slice structure along with its dependent + * data structures + * + * \par Input: + * Input Parameters InputParameters *p_Inp, VideoParameters *p_Vid + ************************************************************************ + */ +void malloc_slice(InputParameters *p_Inp, VideoParameters *p_Vid) +{ + int memory_size = 0; + Slice *currSlice; + + p_Vid->currentSlice = (Slice *) _aligned_malloc(sizeof(Slice), 32); + if ( (currSlice = p_Vid->currentSlice) == NULL) + { + error("Memory allocation for Slice datastruct failed",100); + } + memset(p_Vid->currentSlice, 0, sizeof(Slice)); + // p_Vid->currentSlice->rmpni_buffer=NULL; + //! you don't know whether we do CABAC here, hence initialize CABAC anyway + // if (p_Inp->symbol_mode == CABAC) + + // create all context models + currSlice->mot_ctx = create_contexts_MotionInfo(); + currSlice->tex_ctx = create_contexts_TextureInfo(); + + + currSlice->max_part_nr = 3; //! assume data partitioning (worst case) for the following mallocs() + currSlice->partArr = AllocPartition(currSlice->max_part_nr); + currSlice->p_colocated = NULL; + + currSlice->coeff_ctr = -1; + currSlice->pos = 0; +} + + +/*! + ************************************************************************ + * \brief + * Memory frees of the Slice structure and of its dependent + * data structures + * + * \par Input: + * Input Parameters InputParameters *p_Inp, VideoParameters *p_Vid + ************************************************************************ + */ +void free_slice(Slice *currSlice) +{ + FreePartition (currSlice->partArr, 3); + + if (1) + { + // delete all context models + delete_contexts_MotionInfo(currSlice->mot_ctx); + delete_contexts_TextureInfo(currSlice->tex_ctx); + } + _aligned_free(currSlice); + + currSlice = NULL; +} + +/*! + ************************************************************************ + * \brief + * Dynamic memory allocation of frame size related global buffers + * buffers are defined in global.h, allocated memory must be freed in + * void free_global_buffers() + * + * \par Input: + * Input Parameters InputParameters *p_Inp, Image Parameters VideoParameters *p_Vid + * + * \par Output: + * Number of allocated bytes + *********************************************************************** + */ +int init_global_buffers(VideoParameters *p_Vid) +{ + int memory_size=0; + int i; + + if (p_Vid->global_init_done) + { + free_global_buffers(p_Vid); + } + + // allocate memory in structure p_Vid + if( IS_INDEPENDENT(p_Vid) ) + { + for( i=0; i<MAX_PLANE; ++i ) + { + if(((p_Vid->mb_data_JV[i]) = (Macroblock *) calloc(p_Vid->FrameSizeInMbs, sizeof(Macroblock))) == NULL) + no_mem_exit("init_global_buffers: p_Vid->mb_data"); + } + p_Vid->mb_data = NULL; + } + else + { + if(((p_Vid->mb_data) = (Macroblock *) calloc(p_Vid->FrameSizeInMbs, sizeof(Macroblock))) == NULL) + no_mem_exit("init_global_buffers: p_Vid->mb_data"); + } + + if(((p_Vid->intra_block) = (int*)calloc(p_Vid->FrameSizeInMbs, sizeof(int))) == NULL) + no_mem_exit("init_global_buffers: p_Vid->intra_block"); + + p_Vid->PicPos = (h264_pic_position *)calloc(p_Vid->FrameSizeInMbs + 1, sizeof(h264_pic_position)); //! Helper array to access macroblock positions. We add 1 to also consider last MB. + + for (i = 0; i < (int) p_Vid->FrameSizeInMbs + 1;++i) + { + p_Vid->PicPos[i][0] = (i % p_Vid->PicWidthInMbs); + p_Vid->PicPos[i][1] = (i / p_Vid->PicWidthInMbs); + } + + memory_size += get_mem2D(&(p_Vid->ipredmode), 4*p_Vid->FrameHeightInMbs, 4*p_Vid->PicWidthInMbs); + + // CAVLC mem + p_Vid->nz_coeff = (h264_nz_coefficient *)_aligned_malloc(p_Vid->FrameSizeInMbs*sizeof(h264_nz_coefficient), 32); + memset(p_Vid->nz_coeff, 0, p_Vid->FrameSizeInMbs*sizeof(h264_nz_coefficient)); + //memory_size += get_mem4D(&(p_Vid->nz_coeff), p_Vid->FrameSizeInMbs, 3, BLOCK_SIZE, BLOCK_SIZE); + + memory_size += get_mem2Dint(&(p_Vid->siblock), p_Vid->FrameHeightInMbs, p_Vid->PicWidthInMbs); + + init_qp_process(p_Vid); + + p_Vid->global_init_done = 1; + + p_Vid->oldFrameSizeInMbs = p_Vid->FrameSizeInMbs; + + return (memory_size); +} + +/*! + ************************************************************************ + * \brief + * Free allocated memory of frame size related global buffers + * buffers are defined in global.h, allocated memory is allocated in + * int init_global_buffers() + * + * \par Input: + * Input Parameters InputParameters *p_Inp, Image Parameters VideoParameters *p_Vid + * + * \par Output: + * none + * + ************************************************************************ + */ +void free_global_buffers(VideoParameters *p_Vid) +{ + // CAVLC free mem + _aligned_free(p_Vid->nz_coeff); + + free_mem2Dint(p_Vid->siblock); + + // free mem, allocated for structure p_Vid + if (p_Vid->mb_data != NULL) + free(p_Vid->mb_data); + + free(p_Vid->PicPos); + + free (p_Vid->intra_block); + free_mem2D(p_Vid->ipredmode); + + free_qp_matrices(p_Vid); + + p_Vid->global_init_done = 0; + +} diff --git a/Src/h264dec/ldecod/src/loopFilter.c b/Src/h264dec/ldecod/src/loopFilter.c new file mode 100644 index 00000000..c1c19a2f --- /dev/null +++ b/Src/h264dec/ldecod/src/loopFilter.c @@ -0,0 +1,1338 @@ + +/*! +************************************************************************************* +* \file loopFilter.c +* +* \brief +* Filter to reduce blocking artifacts on a macroblock level. +* The filter strength is QP dependent. +* +* \author +* Contributors: +* - Peter List Peter.List@t-systems.de: Original code (13-Aug-2001) +* - Jani Lainema Jani.Lainema@nokia.com: Some bug fixing, removal of recursiveness (16-Aug-2001) +* - Peter List Peter.List@t-systems.de: inplace filtering and various simplifications (10-Jan-2002) +* - Anthony Joch anthony@ubvideo.com: Simplified switching between filters and +* non-recursive default filter. (08-Jul-2002) +* - Cristina Gomila cristina.gomila@thomson.net: Simplification of the chroma deblocking +* from JVT-E089 (21-Nov-2002) +* - Alexis Michael Tourapis atour@dolby.com: Speed/Architecture improvements (08-Feb-2007) +************************************************************************************* +*/ + +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" + +/*********************************************************************************************************/ + +// NOTE: In principle, the alpha and beta tables are calculated with the formulas below +// Alpha( qp ) = 0.8 * (2^(qp/6) - 1) +// Beta ( qp ) = 0.5 * qp - 7 + +// The tables actually used have been "hand optimized" though (by Anthony Joch). So, the +// table values might be a little different to formula-generated values. Also, the first +// few values of both tables is set to zero to force the filter off at low qp’s + +static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ; +static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ; +static const byte CLIP_TAB[52][5] = +{ + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1}, + { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3}, + { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6}, + { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16}, + { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25} +} ; + +static const char chroma_edge[2][4][4] = //[dir][edge][yuv_format] +{ { {-4, 0, 0, 0}, +{-4,-4,-4, 4}, +{-4, 4, 4, 8}, +{-4,-4,-4, 12}}, + +{ {-4, 0, 0, 0}, +{-4,-4, 4, 4}, +{-4, 4, 8, 8}, +{-4,-4, 12, 12}}}; + +static const int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format] + +/* YUV420 & non-aff optimized functions */ +void EdgeLoopLuma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, PixelPos pixMB, Macroblock *MbP); +void EdgeLoopLuma_Horiz_YUV420(VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, PixelPos pixMB, Macroblock *MbP); +void EdgeLoopLumaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p); +void EdgeLoopChroma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, int uv, PixelPos pixMB, Macroblock *MbP); +void EdgeLoopChroma_Horiz_YUV420(VideoImage *image, const byte Strength[4], Macroblock *MbQ, int uv, PixelPos pixMB, Macroblock *MbP); +void EdgeLoopChromaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p); +void GetStrength_Vert_YUV420(uint8_t Strength[4], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP); +void GetStrength_Vert_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag); +void GetStrength_Horiz_YUV420(uint8_t Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP); +void GetStrength_Horiz_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag); +void GetStrength_MBAff_Vert_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p); +void GetStrengthMBAff_Horiz_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p); +static void Deblock_YUV420(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr); +static void Deblock_YUV420_MBAFF(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr); +/* */ +void EdgeLoopChromaNormal_Vert(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p); +void EdgeLoopLumaNormal_Vert(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p); +void GetStrengthNormal_Vert(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p); +void GetStrengthNormal_Horiz(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p); +static void GetStrengthNormal (byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int dir,int edge, int mvlimit,StorablePicture *p); +static void GetStrengthMBAff (byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int dir,int edge, int mvlimit,StorablePicture *p); +static void EdgeLoopLumaNormal(ColorPlane pl, struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, StorablePicture *p); +static void EdgeLoopLumaMBAff (ColorPlane pl, struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, StorablePicture *p); +static void EdgeLoopChromaNormal(struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p); +static void EdgeLoopChromaMBAff(struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p); +static void DeblockMb(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr); +static void EdgeLoopLumaMBAff_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p); +static void EdgeLoopLumaMBAff_Vert(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p); + +/*! +***************************************************************************************** +* \brief +* Filter all macroblocks in order of increasing macroblock address. +***************************************************************************************** +*/ +void DeblockPicture(VideoParameters *p_Vid, StorablePicture *p) +{ + unsigned i; + + if (!p->mb_aff_frame_flag && p_Vid->active_sps->chroma_format_idc==YUV420 && p_Vid->getNeighbour == getNonAffNeighbour && !p_Vid->mixedModeEdgeFlag) + { + for (i = 0; i < p->PicSizeInMbs; ++i) + { + Deblock_YUV420( p_Vid, p, i ) ; + } + } + else if (p->mb_aff_frame_flag && p_Vid->active_sps->chroma_format_idc==YUV420 && p_Vid->getNeighbour == getAffNeighbour) + { + for (i = 0; i < p->PicSizeInMbs; ++i) + { + Deblock_YUV420_MBAFF( p_Vid, p, i ) ; + } + } + else + { + if (p->mb_aff_frame_flag == 1) + { + p_Vid->GetStrength = GetStrengthMBAff; + p_Vid->EdgeLoopLuma = EdgeLoopLumaMBAff; + p_Vid->EdgeLoopChroma = EdgeLoopChromaMBAff; + } + else + { + p_Vid->GetStrength = GetStrengthNormal; + p_Vid->EdgeLoopLuma = EdgeLoopLumaNormal; + p_Vid->EdgeLoopChroma = EdgeLoopChromaNormal; + } + + for (i = 0; i < p->PicSizeInMbs; ++i) + { + DeblockMb( p_Vid, p, i ) ; + } + } +} + + +/*! +***************************************************************************************** +* \brief +* Deblocking filter for one macroblock. +***************************************************************************************** +*/ + +static void DeblockMb(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr) +{ + int EdgeCondition; + int dir, edge; + __declspec(align(32)) byte Strength[16]; + short mb_x, mb_y; + + int filterNon8x8LumaEdgesFlag[4] = {1,1,1,1}; + int filterLeftMbEdgeFlag; + int filterTopMbEdgeFlag; + int fieldModeMbFlag; + int mvlimit = 4; + int i, StrengthSum; + Macroblock *MbQ = &(p_Vid->mb_data[MbQAddr]) ; // current Mb + VideoImage *imgY = p->imgY; + VideoImage *imgUV[2] = {p->imgUV[0], p->imgUV[1]}; + + int edge_cr; + + // return, if filter is disabled + if (MbQ->DFDisableIdc==1) + { + p_Vid->DeblockCall = 0; + return; + } + p_Vid->DeblockCall = 1; + get_mb_pos (p_Vid, MbQAddr, p_Vid->mb_size[IS_LUMA], &mb_x, &mb_y); + + filterLeftMbEdgeFlag = (mb_x != 0); + filterTopMbEdgeFlag = (mb_y != 0); + + if (MbQ->mb_type == I8MB) + assert(MbQ->luma_transform_size_8x8_flag); + + filterNon8x8LumaEdgesFlag[1] = + filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag); + + if (p->mb_aff_frame_flag && mb_y == MB_BLOCK_SIZE && MbQ->mb_field) + filterTopMbEdgeFlag = 0; + + fieldModeMbFlag = (p->structure!=FRAME) || (p->mb_aff_frame_flag && MbQ->mb_field); + if (fieldModeMbFlag) + mvlimit = 2; + + if (MbQ->DFDisableIdc==2) + { + // don't filter at slice boundaries + filterLeftMbEdgeFlag = MbQ->mb_avail_left; + // if this the bottom of a frame macroblock pair then always filter the top edge + filterTopMbEdgeFlag = (p->mb_aff_frame_flag && !MbQ->mb_field && (MbQAddr & 0x01)) ? 1 : MbQ->mb_avail_up; + } + + CheckAvailabilityOfNeighbors(MbQ); + + for( dir = 0 ; dir < 2 ; ++dir ) // filter first vertical edges, followed by horizontal + { + EdgeCondition = (dir && filterTopMbEdgeFlag) || (!dir && filterLeftMbEdgeFlag); // can not filter beyond picture boundaries + for( edge=0; edge<4 ; ++edge ) // first 4 vertical strips of 16 pel + { // then 4 horizontal + if( edge || EdgeCondition ) + { + edge_cr = chroma_edge[dir][edge][p->chroma_format_idc]; + + p_Vid->GetStrength(Strength, MbQ, dir, edge << 2, mvlimit, p); // Strength for 4 blks in 1 stripe + StrengthSum = Strength[0]; + for (i = 1; i < MB_BLOCK_SIZE && StrengthSum == 0 ; ++i) + { + StrengthSum += (int) Strength[i]; + } + + if( StrengthSum ) // only if one of the 16 Strength bytes is != 0 + { + if (filterNon8x8LumaEdgesFlag[edge]) + { + p_Vid->EdgeLoopLuma( PLANE_Y, imgY, Strength, MbQ, dir, edge << 2, p) ; + if( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) ) + { + p_Vid->EdgeLoopLuma(PLANE_U, imgUV[0], Strength, MbQ, dir, edge << 2, p); + p_Vid->EdgeLoopLuma(PLANE_V, imgUV[1], Strength, MbQ, dir, edge << 2, p); + } + } + if (p_Vid->active_sps->chroma_format_idc==YUV420 || p_Vid->active_sps->chroma_format_idc==YUV422) + { + if( (imgUV != NULL) && (edge_cr >= 0)) + { + p_Vid->EdgeLoopChroma( imgUV[0], Strength, MbQ, dir, edge_cr, 0, p); + p_Vid->EdgeLoopChroma( imgUV[1], Strength, MbQ, dir, edge_cr, 1, p); + } + } + } + + if (dir && !edge && !MbQ->mb_field && p_Vid->mixedModeEdgeFlag) + { + // this is the extra horizontal edge between a frame macroblock pair and a field above it + p_Vid->DeblockCall = 2; + p_Vid->GetStrength(Strength, MbQ, 1, MB_BLOCK_SIZE, mvlimit, p); // Strength for 4 blks in 1 stripe + //if( *((int*)Strength) ) // only if one of the 4 Strength bytes is != 0 + { + if (filterNon8x8LumaEdgesFlag[edge]) + { + p_Vid->EdgeLoopLuma(PLANE_Y, imgY, Strength, MbQ, dir, MB_BLOCK_SIZE, p) ; + if( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) ) + { + p_Vid->EdgeLoopLuma(PLANE_U, imgUV[0], Strength, MbQ, dir, MB_BLOCK_SIZE, p) ; + p_Vid->EdgeLoopLuma(PLANE_V, imgUV[1], Strength, MbQ, dir, MB_BLOCK_SIZE, p) ; + } + } + if (p_Vid->active_sps->chroma_format_idc==YUV420 || p_Vid->active_sps->chroma_format_idc==YUV422) + { + if( (imgUV != NULL) && (edge_cr >= 0)) + { + p_Vid->EdgeLoopChroma( imgUV[0], Strength, MbQ, dir, MB_BLOCK_SIZE, 0, p) ; + p_Vid->EdgeLoopChroma( imgUV[1], Strength, MbQ, dir, MB_BLOCK_SIZE, 1, p) ; + } + } + } + p_Vid->DeblockCall = 1; + } + } + }//end edge + }//end loop dir + + p_Vid->DeblockCall = 0; +} + + + +static void Deblock_YUV420_MBAFF(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr) +{ + int EdgeCondition; + int edge; + __declspec(align(32)) byte Strength[16]; + short mb_x, mb_y; + + int filterNon8x8LumaEdgesFlag[4] = {1,1,1,1}; + int filterLeftMbEdgeFlag; + int filterTopMbEdgeFlag; + int fieldModeMbFlag; + int mvlimit = 4; + int i, StrengthSum; + Macroblock *MbQ = &(p_Vid->mb_data[MbQAddr]) ; // current Mb + VideoImage *imgY = p->imgY; + VideoImage *imgUV[2] = {p->imgUV[0], p->imgUV[1]}; + + int edge_cr; + + // return, if filter is disabled + if (MbQ->DFDisableIdc==1) + { + p_Vid->DeblockCall = 0; + return; + } + p_Vid->DeblockCall = 1; + get_mb_block_pos_mbaff(p_Vid->PicPos, MbQAddr, &mb_x, &mb_y); + + filterLeftMbEdgeFlag = (mb_x != 0); + filterTopMbEdgeFlag = (mb_y != 0); + + if (MbQ->mb_type == I8MB) + assert(MbQ->luma_transform_size_8x8_flag); + + filterNon8x8LumaEdgesFlag[1] = + filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag); + + if (1 && mb_y == 1 && MbQ->mb_field) + filterTopMbEdgeFlag = 0; + + fieldModeMbFlag = (p->structure!=FRAME) || MbQ->mb_field; + if (fieldModeMbFlag) + mvlimit = 2; + + if (MbQ->DFDisableIdc==2) + { + // don't filter at slice boundaries + filterLeftMbEdgeFlag = MbQ->mb_avail_left; + // if this the bottom of a frame macroblock pair then always filter the top edge + filterTopMbEdgeFlag = (1 && !MbQ->mb_field && (MbQAddr & 0x01)) ? 1 : MbQ->mb_avail_up; + } + + CheckAvailabilityOfNeighbors(MbQ); + + + EdgeCondition = filterLeftMbEdgeFlag; // can not filter beyond picture boundaries + for( edge=0; edge<4 ; ++edge ) // first 4 vertical strips of 16 pel + { // then 4 horizontal + if( edge || EdgeCondition ) + { + edge_cr = chroma_edge[0][edge][YUV420]; + + GetStrength_MBAff_Vert_YUV420(Strength, MbQ, edge << 2, mvlimit, p); // Strength for 4 blks in 1 stripe + StrengthSum = Strength[0]; + for (i = 1; i < MB_BLOCK_SIZE && StrengthSum == 0 ; ++i) + { + StrengthSum += (int) Strength[i]; + } + + if( StrengthSum ) // only if one of the 16 Strength bytes is != 0 + { + if (filterNon8x8LumaEdgesFlag[edge]) + { + EdgeLoopLumaMBAff_Vert_YUV420(imgY, Strength, MbQ, edge << 2, p) ; + } + if( (imgUV != NULL) && (edge_cr >= 0)) + { + EdgeLoopChromaMBAff_Vert_YUV420( imgUV[0], Strength, MbQ, edge_cr, 0, p); + EdgeLoopChromaMBAff_Vert_YUV420( imgUV[1], Strength, MbQ, edge_cr, 1, p); + } + } + } + }//end edge + + EdgeCondition = filterTopMbEdgeFlag; // can not filter beyond picture boundaries + for( edge=0; edge<4 ; ++edge ) // first 4 vertical strips of 16 pel + { // then 4 horizontal + if( edge || EdgeCondition ) + { + edge_cr = chroma_edge[1][edge][YUV420]; + + GetStrengthMBAff_Horiz_YUV420(Strength, MbQ, edge << 2, mvlimit, p); // Strength for 4 blks in 1 stripe + StrengthSum = Strength[0]; + for (i = 1; i < MB_BLOCK_SIZE && StrengthSum == 0 ; ++i) + { + StrengthSum += (int) Strength[i]; + } + + if( StrengthSum ) // only if one of the 16 Strength bytes is != 0 + { + if (filterNon8x8LumaEdgesFlag[edge]) + { + EdgeLoopLumaMBAff_Horiz( PLANE_Y, imgY, Strength, MbQ, edge << 2, p) ; + } + if( (imgUV != NULL) && (edge_cr >= 0)) + { + EdgeLoopChromaMBAff( imgUV[0], Strength, MbQ, 1, edge_cr, 0, p); + EdgeLoopChromaMBAff( imgUV[1], Strength, MbQ, 1, edge_cr, 1, p); + } + } + if (!edge && !MbQ->mb_field && p_Vid->mixedModeEdgeFlag) + { + // this is the extra horizontal edge between a frame macroblock pair and a field above it + p_Vid->DeblockCall = 2; + GetStrengthMBAff(Strength, MbQ, 1, MB_BLOCK_SIZE, mvlimit, p); // Strength for 4 blks in 1 stripe + //if( *((int*)Strength) ) // only if one of the 4 Strength bytes is != 0 + { + EdgeLoopLumaMBAff_Horiz(PLANE_Y, imgY, Strength, MbQ, MB_BLOCK_SIZE, p) ; + + EdgeLoopChromaMBAff( imgUV[0], Strength, MbQ, 1, MB_BLOCK_SIZE, 0, p) ; + EdgeLoopChromaMBAff( imgUV[1], Strength, MbQ, 1, MB_BLOCK_SIZE, 1, p) ; + + } + p_Vid->DeblockCall = 1; + } + } + }//end edge + + + p_Vid->DeblockCall = 0; +} + + + +static void Deblock_YUV420(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr) +{ + __declspec(align(32)) union + { + uint32_t as32[4]; + uint8_t as8[16]; + uint8_t edge[4][4]; + } strength; + uint8_t alphas[2], alphas_chroma[2][2]; + uint8_t betas[2], betas_chroma[2][2]; + __declspec(align(32)) union + { + uint32_t as32[4]; + uint8_t as8[16]; + } thresholds; + short mb_x, mb_y; + int filterLeftMbEdgeFlag; + int filterTopMbEdgeFlag; + int mvlimit = 4; + Macroblock *MbQ = &(p_Vid->mb_data[MbQAddr]) ; // current Mb + Macroblock *MbP=0; + VideoImage *imgY = p->imgY; + imgpel *YQ, *UQ, *VQ; + VideoImage *imgUV[2] = {p->imgUV[0], p->imgUV[1]}; + int QPQ = MbQ->qp; + int indexAQ = iClip3(0, MAX_QP, QPQ + MbQ->DFAlphaC0Offset); + int indexBQ = iClip3(0, MAX_QP, QPQ + MbQ->DFBetaOffset); + const byte *ClipTabQ = CLIP_TAB[indexAQ], *ClipTabQ_Chroma[2], *ClipTabP_Chroma[2]; + + // return, if filter is disabled + if (MbQ->DFDisableIdc==1) + { + p_Vid->DeblockCall = 0; + return; + } + + alphas[1] = ALPHA_TABLE[indexAQ]; + betas[1] = BETA_TABLE [indexBQ]; + + indexAQ = iClip3(0, MAX_QP, MbQ->qpc[0] + MbQ->DFAlphaC0Offset); + alphas_chroma[0][1] = ALPHA_TABLE[indexAQ]; + ClipTabQ_Chroma[0] = CLIP_TAB [indexAQ]; + indexAQ = iClip3(0, MAX_QP, MbQ->qpc[1] + MbQ->DFAlphaC0Offset); + alphas_chroma[1][1] = ALPHA_TABLE[indexAQ]; + ClipTabQ_Chroma[1] = CLIP_TAB [indexAQ]; + + indexBQ = iClip3(0, MAX_QP, MbQ->qpc[0] + MbQ->DFBetaOffset); + betas_chroma[0][1] = BETA_TABLE[indexBQ]; + indexBQ = iClip3(0, MAX_QP, MbQ->qpc[1] + MbQ->DFBetaOffset); + betas_chroma[1][1] = BETA_TABLE[indexBQ]; + + p_Vid->DeblockCall = 1; + get_mb_block_pos_normal(p_Vid->PicPos, MbQAddr, &mb_x, &mb_y); + + filterLeftMbEdgeFlag = (mb_x != 0); + filterTopMbEdgeFlag = (mb_y != 0); + YQ = imgY->base_address + mb_y*16 * imgY->stride + mb_x*16; + UQ = imgUV[0]->base_address + mb_y * 8 * imgUV[0]->stride + mb_x * 8; + VQ = imgUV[1]->base_address + mb_y * 8 * imgUV[1]->stride + mb_x * 8; + + if (p->structure!=FRAME) + mvlimit = 2; + + if (MbQ->DFDisableIdc==2) + { + // don't filter at slice boundaries + filterLeftMbEdgeFlag = MbQ->mb_avail_left; + // if this the bottom of a frame macroblock pair then always filter the top edge + filterTopMbEdgeFlag = MbQ->mb_avail_up; + } + + //CheckAvailabilityOfNeighbors(MbQ); + +#pragma region vertical + if(filterLeftMbEdgeFlag) // can not filter beyond picture boundaries + { + MbP = &(p_Vid->mb_data[MbQ->mb_addr_left]); + } + else + MbP=0; + + GetStrength_Vert_YUV420_All(strength.edge, MbQ, mvlimit, p, mb_x*4, mb_y*4, MbP, MbQ->luma_transform_size_8x8_flag); + + { + int i; + if (MbP) + { + int QP_Chroma0 = (MbP->qpc[0] + MbQ->qpc[0] + 1) >> 1; + int QP_Chroma1 = (MbP->qpc[1] + MbQ->qpc[1] + 1) >> 1; + int QP = (MbP->qp + QPQ + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + const byte *ClipTab = CLIP_TAB[indexA]; + + alphas[0] = ALPHA_TABLE[indexA]; + betas[0] = BETA_TABLE [indexB]; + for (i=0;i<4;i++) + { + thresholds.as8[i] = ClipTab[strength.as8[i]]; + } + + indexA = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFAlphaC0Offset); + alphas_chroma[0][0] = ALPHA_TABLE[indexA]; + ClipTabP_Chroma[0] = CLIP_TAB[indexA]; + indexB = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFBetaOffset); + betas_chroma[0][0] =BETA_TABLE[indexB]; + + indexA = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFAlphaC0Offset); + alphas_chroma[1][0] = ALPHA_TABLE[indexA]; + ClipTabP_Chroma[1] = CLIP_TAB[indexA]; + indexB = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFBetaOffset); + betas_chroma[1][0] = BETA_TABLE[indexB]; + } + + for (i=4;i<16;i++) + { + thresholds.as8[i] = ClipTabQ[strength.as8[i]]; + } + + ippiFilterDeblockingLuma_VerEdge_H264_8u_C1IR(YQ, imgY->stride, alphas, betas, thresholds.as8, strength.as8); + + if (MbP) + { + for (i=0;i<4;i++) + { + thresholds.as8[i] = ClipTabP_Chroma[0][strength.as8[i]]; + } + } + for (i=4;i<8;i++) + { + thresholds.as8[i] = ClipTabQ_Chroma[0][strength.as8[i+4]]; + } + ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR(UQ, imgUV[0]->stride, alphas_chroma[0], betas_chroma[0], thresholds.as8, strength.as8); + + if (MbP) + { + for (i=0;i<4;i++) + { + thresholds.as8[i] = ClipTabP_Chroma[1][strength.as8[i]]; + } + } + for (i=4;i<8;i++) + { + thresholds.as8[i] = ClipTabQ_Chroma[1][strength.as8[i+4]]; + } + ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR(VQ, imgUV[1]->stride, alphas_chroma[1], betas_chroma[1], thresholds.as8, strength.as8); + + } + +#pragma endregion + + +#pragma region horizontal + MbP = 0; + /* ---- horizontal ---- */ + // edge=0; + if(filterTopMbEdgeFlag) // can not filter beyond picture boundaries + { + MbP = &(p_Vid->mb_data[MbQ->mb_addr_up]); + } + else + { + MbP = 0; + } + + GetStrength_Horiz_YUV420_All(strength.edge, MbQ, mvlimit, p, mb_x*4, mb_y*4, MbP, MbQ->luma_transform_size_8x8_flag); + + { + int i; + + if (MbP) + { + int QP_Chroma0 = (MbP->qpc[0] + MbQ->qpc[0] + 1) >> 1; + int QP_Chroma1 = (MbP->qpc[1] + MbQ->qpc[1] + 1) >> 1; + int QP = (MbP->qp + QPQ + 1) >> 1; + + int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset); + int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset); + const byte *ClipTab = CLIP_TAB[indexA]; + + alphas[0] = ALPHA_TABLE[indexA]; + betas[0] = BETA_TABLE [indexB]; + for (i=0;i<4;i++) + { + thresholds.as8[i] = ClipTab[strength.as8[i]]; + } + + indexA = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFAlphaC0Offset); + alphas_chroma[0][0] = ALPHA_TABLE[indexA]; + ClipTabP_Chroma[0] = CLIP_TAB[indexA]; + indexB = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFBetaOffset); + betas_chroma[0][0] =BETA_TABLE[indexB]; + + indexA = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFAlphaC0Offset); + alphas_chroma[1][0] = ALPHA_TABLE[indexA]; + ClipTabP_Chroma[1] = CLIP_TAB[indexA]; + indexB = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFBetaOffset); + betas_chroma[1][0] = BETA_TABLE[indexB]; + } + + for (i=4;i<16;i++) + { + thresholds.as8[i] = ClipTabQ[strength.as8[i]]; + } + + ippiFilterDeblockingLuma_HorEdge_H264_8u_C1IR(YQ, imgY->stride, alphas, betas, thresholds.as8, strength.as8); + + if (MbP) + { + for (i=0;i<4;i++) + { + thresholds.as8[i] = ClipTabP_Chroma[0][strength.as8[i]]; + } + } + for (i=4;i<8;i++) + { + thresholds.as8[i] = ClipTabQ_Chroma[0][strength.as8[i+4]]; + } + ippiFilterDeblockingChroma_HorEdge_H264_8u_C1IR(UQ, imgUV[0]->stride, alphas_chroma[0], betas_chroma[0], thresholds.as8, strength.as8); + + if (MbP) + { + for (i=0;i<4;i++) + { + thresholds.as8[i] = ClipTabP_Chroma[1][strength.as8[i]]; + } + } + for (i=4;i<8;i++) + { + thresholds.as8[i] = ClipTabQ_Chroma[1][strength.as8[i+4]]; + } + ippiFilterDeblockingChroma_HorEdge_H264_8u_C1IR(VQ, imgUV[1]->stride, alphas_chroma[1], betas_chroma[1], thresholds.as8, strength.as8); + + } +#pragma endregion + + p_Vid->DeblockCall = 0; +} + + +#define ANY_INTRA (MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM) + +/*! +********************************************************************************************* +* \brief +* returns a buffer of 16 Strength values for one stripe in a mb (for different Frame or Field types) +********************************************************************************************* +*/ +void GetStrengthNormal_Horiz(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p); +void GetStrengthNormal_Vert(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p); + +static void GetStrengthNormal(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p) +{ + if (dir == 0) + GetStrengthNormal_Vert(Strength, MbQ, edge, mvlimit, p); + else + GetStrengthNormal_Horiz(Strength, MbQ, edge, mvlimit, p); +} + +/*! +********************************************************************************************* +* \brief +* returns a buffer of 16 Strength values for one stripe in a mb (for MBAFF) +********************************************************************************************* +*/ +static void GetStrengthMBAff_Horiz(byte Strength[16], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p) +{ + // dir == 1 + short blkP, blkQ, idx; + short blk_x, blk_x2, blk_y, blk_y2 ; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int xQ, yQ; + short mb_x, mb_y; + Macroblock *MbP; + + PixelPos pixP; + int dir_m1 = 0; + + PicMotionParams *motion = &p->motion; + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + yQ = (edge < MB_BLOCK_SIZE ? edge : 1); + + for( idx = 0; idx < 16; ++idx ) + { + VideoParameters *p_Vid = MbQ->p_Vid; + xQ = idx; + + p_Vid->getNeighbourLuma(MbQ, xQ , yQ - 1, &pixP); + blkQ = (short) ((yQ & 0xFFFC) + (xQ >> 2)); + blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2)); + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field); + + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) || + (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3; + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) || + (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3; + + if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM) + && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) ) + { + if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) ) + Strength[idx] = 2 ; + else + { + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + if (p_Vid->mixedModeEdgeFlag) + { + (Strength[idx] = 1); + } + else + { + p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + blk_y = (short) ((mb_y<<2) + (blkQ >> 2)); + blk_x = (short) ((mb_x<<2) + (blkQ & 3)); + blk_y2 = (short) (pixP.pos_y >> 2); + blk_x2 = (short) (pixP.pos_x >> 2); + { + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + motion_p0=&motion0[blk_y ][blk_x ]; + motion_q0=&motion0[blk_y2][blk_x2]; + motion_p1=&motion1[blk_y ][blk_x ]; + motion_q1=&motion1[blk_y2][blk_x2]; + + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || + ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + Strength[idx]=0; + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0==ref_q0) + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + + Strength[idx] = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + Strength[idx] = 1; + } + } + } + } + } + } + } +} + +static void GetStrengthMBAff_Vert(byte Strength[16], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p) +{ + // dir == 0 + short blkP, blkQ, idx; + short blk_x, blk_x2, blk_y, blk_y2 ; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int xQ, yQ; + short mb_x, mb_y; + Macroblock *MbP; + + PixelPos pixP; + int dir_m1 = 1; + + PicMotionParams *motion = &p->motion; + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + xQ = edge; + for( idx = 0; idx < 16; ++idx ) + { + VideoParameters *p_Vid = MbQ->p_Vid; + + yQ = idx; + p_Vid->getNeighbourLuma(MbQ, xQ - 1, yQ, &pixP); + blkQ = (short) ((yQ & 0xFFFC) + (xQ >> 2)); + blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2)); + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field); + + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) || + (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)) || + ((p->mb_aff_frame_flag || (p->structure != FRAME))))) ? 4 : 3; + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) || + (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)) || + ((p->mb_aff_frame_flag || (p->structure!=FRAME))))) ? 4 : 3; + + if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM) + && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) ) + { + if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) ) + Strength[idx] = 2 ; + else + { + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + if (p_Vid->mixedModeEdgeFlag) + { + (Strength[idx] = 1); + } + else + { + p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + blk_y = (short) ((mb_y<<2) + (blkQ >> 2)); + blk_x = (short) ((mb_x<<2) + (blkQ & 3)); + blk_y2 = (short) (pixP.pos_y >> 2); + blk_x2 = (short) (pixP.pos_x >> 2); + { + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + motion_p0=&motion0[blk_y ][blk_x ]; + motion_q0=&motion0[blk_y2][blk_x2]; + motion_p1=&motion1[blk_y ][blk_x ]; + motion_q1=&motion1[blk_y2][blk_x2]; + + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || + ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + Strength[idx]=0; + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0==ref_q0) + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + + Strength[idx] = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + Strength[idx] = 1; + } + } + } + } + } + } + } +} + +static void GetStrengthMBAff(byte Strength[16], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p) +{ + if (dir == 0) + GetStrengthMBAff_Vert(Strength, MbQ, dir, edge, mvlimit, p); + else + GetStrengthMBAff_Horiz(Strength, MbQ, dir, edge, mvlimit, p); +} + +/*! +***************************************************************************************** +* \brief +* Filters 16 pel block edge of Frame or Field coded MBs +***************************************************************************************** +*/ + + +static void EdgeLoopLumaNormal(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, + int dir, int edge, StorablePicture *p) +{ + if (dir == 0) + EdgeLoopLumaNormal_Vert(pl, image, Strength, MbQ, edge, p); + else if (sse2_flag) + EdgeLoopLumaNormal_Horiz_sse2(pl, image, Strength, MbQ, edge, p); + else + EdgeLoopLumaNormal_Horiz(pl, image, Strength, MbQ, edge, p); +} + +/*! +***************************************************************************************** +* \brief +* Filters 16 pel block edge of Super MB Frame coded MBs +***************************************************************************************** +*/ +static void EdgeLoopLumaMBAff_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p) +{ + // dir == 1 + imgpel **Img = image->img; + int width = image->stride; + int pel, ap = 0, aq = 0, Strng ; + int incP, incQ; + int C0, tc0, dif; + imgpel L0, R0; + int Alpha = 0, Beta = 0 ; + const byte* ClipTab = NULL; + int small_gap; + int indexA, indexB; + int PelNum = pl? pelnum_cr[1][p->chroma_format_idc] : MB_BLOCK_SIZE; + + int QP; + int xQ, yQ; + + PixelPos pixP, pixQ; + VideoParameters *p_Vid = MbQ->p_Vid; + int bitdepth_scale = pl? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + byte fieldModeFilteringFlag; + + Macroblock *MbP; + imgpel *SrcPtrP, *SrcPtrQ; + + for( pel = 0 ; pel < PelNum ; ++pel ) + { + xQ = pel ; + yQ = (edge < 16 ? edge : 1) ; + p_Vid->getNeighbourLuma(MbQ, xQ, yQ - 1, &pixP); + + if (pixP.available || (MbQ->DFDisableIdc== 0)) + { + if( (Strng = Strength[pel]) != 0) + { + p_Vid->getNeighbourLuma(MbQ, xQ, yQ, &pixQ); + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field); + + incQ = ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width); + incP = ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width); + SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + + // Average QP of the two blocks + QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1; + + indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + Beta = BETA_TABLE [indexB] * bitdepth_scale; + ClipTab = CLIP_TAB[indexA]; + + + L0 = SrcPtrP[0] ; + R0 = SrcPtrQ[0] ; + + + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel L1 = SrcPtrP[-incP]; + imgpel R1 = SrcPtrQ[ incQ]; + if ((abs( R0 - R1) < Beta ) && (abs(L0 - L1) < Beta )) + { + imgpel L2 = SrcPtrP[-incP*2]; + imgpel R2 = SrcPtrQ[ incQ*2]; + if(Strng == 4 ) // INTRA strong filtering + { + int RL0 = L0 + R0; + small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2)); + aq = ( abs( R0 - R2) < Beta ) & small_gap; + ap = ( abs( L0 - L2) < Beta ) & small_gap; + + if (ap) + { + imgpel L3 = SrcPtrP[-incP*3]; + SrcPtrP[-incP * 2] = (imgpel) ((((L3 + L2) << 1) + L2 + L1 + RL0 + 4) >> 3); + SrcPtrP[-incP ] = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2); + SrcPtrP[ 0 ] = (imgpel) (( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3); + } + else + { + SrcPtrP[ 0 ] = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ; + } + + if (aq) + { + imgpel R3 = SrcPtrQ[ incQ*3]; + SrcPtrQ[ 0 ] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3); + SrcPtrQ[ incQ ] = (imgpel) (( R2 + R0 + R1 + L0 + 2) >> 2); + SrcPtrQ[ incQ * 2 ] = (imgpel) ((((R3 + R2) << 1) + R2 + R1 + RL0 + 4) >> 3); + } + else + { + SrcPtrQ[ 0 ] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2); + } + } + else // normal filtering + { + int RL0 = (L0 + R0 + 1) >> 1; + aq = (abs( R0 - R2) < Beta); + ap = (abs( L0 - L2) < Beta); + + C0 = ClipTab[ Strng ] * bitdepth_scale; + tc0 = (C0 + ap + aq) ; + dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3) ; + + if( ap ) + *(SrcPtrP - incP) += iClip3( -C0, C0, ( L2 + RL0 - (L1 << 1)) >> 1 ) ; + + *SrcPtrP = (imgpel) iClip1 (max_imgpel_value, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 (max_imgpel_value, R0 - dif) ; + + if( aq ) + *(SrcPtrQ + incQ) += iClip3( -C0, C0, ( R2 + RL0 - (R1 << 1)) >> 1 ) ; + } + } + } + } + } + } +} + +static void EdgeLoopLumaMBAff_Vert(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p) +{ + // dir == 0 + imgpel **Img = image->img; + int width = image->stride; + int pel, ap = 0, aq = 0, Strng ; + + int C0, tc0, dif; + imgpel L0, R0; + int Alpha = 0, Beta = 0 ; + const byte* ClipTab = NULL; + int small_gap; + int indexA, indexB; + int PelNum = pl? pelnum_cr[0][p->chroma_format_idc] : MB_BLOCK_SIZE; + + int QP; + int xQ, yQ; + + PixelPos pixP, pixQ; + VideoParameters *p_Vid = MbQ->p_Vid; + int bitdepth_scale = pl? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + byte fieldModeFilteringFlag; + + Macroblock *MbP; + imgpel *SrcPtrP, *SrcPtrQ; + + for( pel = 0 ; pel < PelNum ; ++pel ) + { + xQ = edge; + yQ = pel; + p_Vid->getNeighbourXPLuma(MbQ, xQ - 1, yQ, &pixP); + + if (pixP.available || (MbQ->DFDisableIdc== 0)) + { + if( (Strng = Strength[pel]) != 0) + { + p_Vid->getNeighbourLuma(MbQ, xQ, yQ, &pixQ); + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field); + + SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + + // Average QP of the two blocks + QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1; + + indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + Beta = BETA_TABLE [indexB] * bitdepth_scale; + ClipTab = CLIP_TAB[indexA]; + + + L0 = SrcPtrP[0] ; + R0 = SrcPtrQ[0] ; + + + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel L1 = SrcPtrP[-1]; + imgpel R1 = SrcPtrQ[ 1]; + if ((abs( R0 - R1) < Beta ) && (abs(L0 - L1) < Beta )) + { + imgpel L2 = SrcPtrP[-2]; + imgpel R2 = SrcPtrQ[ 2]; + if(Strng == 4 ) // INTRA strong filtering + { + int RL0 = L0 + R0; + small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2)); + aq = ( abs( R0 - R2) < Beta ) & small_gap; + ap = ( abs( L0 - L2) < Beta ) & small_gap; + + if (ap) + { + imgpel L3 = SrcPtrP[-3]; + SrcPtrP[-2] = (imgpel) ((((L3 + L2) << 1) + L2 + L1 + RL0 + 4) >> 3); + SrcPtrP[-1 ] = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2); + SrcPtrP[ 0 ] = (imgpel) (( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3); + } + else + { + SrcPtrP[ 0 ] = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ; + } + + if (aq) + { + imgpel R3 = SrcPtrQ[ 3]; + SrcPtrQ[ 0 ] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3); + SrcPtrQ[ 1 ] = (imgpel) (( R2 + R0 + R1 + L0 + 2) >> 2); + SrcPtrQ[ 2 ] = (imgpel) ((((R3 + R2) << 1) + R2 + R1 + RL0 + 4) >> 3); + } + else + { + SrcPtrQ[ 0 ] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2); + } + } + else // normal filtering + { + int RL0 = (L0 + R0 + 1) >> 1; + aq = (abs( R0 - R2) < Beta); + ap = (abs( L0 - L2) < Beta); + + C0 = ClipTab[ Strng ] * bitdepth_scale; + tc0 = (C0 + ap + aq) ; + dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3) ; + + if( ap ) + *(SrcPtrP - 1) += iClip3( -C0, C0, ( L2 + RL0 - (L1 << 1)) >> 1 ) ; + + *SrcPtrP = (imgpel) iClip1 (max_imgpel_value, L0 + dif) ; + *SrcPtrQ = (imgpel) iClip1 (max_imgpel_value, R0 - dif) ; + + if( aq ) + *(SrcPtrQ + 1) += iClip3( -C0, C0, ( R2 + RL0 - (R1 << 1)) >> 1 ) ; + } + } + } + } + } + } +} + +static void EdgeLoopLumaMBAff(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, StorablePicture *p) +{ + if (dir == 0) + EdgeLoopLumaMBAff_Vert(pl, image, Strength, MbQ, edge, p); + else + EdgeLoopLumaMBAff_Horiz(pl, image, Strength, MbQ, edge, p); +} + +/*! +***************************************************************************************** +* \brief +* Filters chroma block edge for Frame or Field coded pictures +***************************************************************************************** +*/ + + +static void EdgeLoopChromaNormal(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p) +{ + if (dir == 0) + EdgeLoopChromaNormal_Vert(image, Strength, MbQ, edge, uv, p); + else + EdgeLoopChromaNormal_Horiz(image, Strength, MbQ, edge, uv, p); + +} +/*! +***************************************************************************************** +* \brief +* Filters chroma block edge for MBAFF types +***************************************************************************************** +*/ +static void EdgeLoopChromaMBAff(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p) +{ + imgpel** Img = image->img; + + int pel, Strng ; + int incP, incQ; + int C0, tc0, dif; + imgpel L0, R0; + int Alpha = 0, Beta = 0; + const byte* ClipTab = NULL; + int indexA, indexB; + VideoParameters *p_Vid = MbQ->p_Vid; + int PelNum = pelnum_cr[dir][p->chroma_format_idc]; + int StrengthIdx; + int QP; + int xQ, yQ; + PixelPos pixP, pixQ; + int dir_m1 = 1 - dir; + int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA]; + int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1]; + + int AlphaC0Offset = MbQ->DFAlphaC0Offset; + int BetaOffset = MbQ->DFBetaOffset; + byte fieldModeFilteringFlag; + Macroblock *MbP; + imgpel *SrcPtrP, *SrcPtrQ; + int width = image->stride; + + for( pel = 0 ; pel < PelNum ; ++pel ) + { + xQ = dir ? pel : edge; + yQ = dir ? (edge < 16? edge : 1) : pel; + p_Vid->getNeighbour(MbQ, xQ, yQ, p_Vid->mb_size[IS_CHROMA], &pixQ); + p_Vid->getNeighbour(MbQ, xQ - (dir_m1), yQ - dir, p_Vid->mb_size[IS_CHROMA], &pixP); + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + StrengthIdx = (PelNum == 8) ? ((MbQ->mb_field && !MbP->mb_field) ? pel << 1 :((pel >> 1) << 2) + (pel & 0x01)) : pel; + + if (pixP.available || (MbQ->DFDisableIdc == 0)) + { + if( (Strng = Strength[StrengthIdx]) != 0) + { + fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field); + incQ = dir ? ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width) : 1; + incP = dir ? ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width) : 1; + SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + + // Average QP of the two blocks + QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1; + + indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset); + indexB = iClip3(0, MAX_QP, QP + BetaOffset); + + Alpha = ALPHA_TABLE[indexA] * bitdepth_scale; + Beta = BETA_TABLE [indexB] * bitdepth_scale; + ClipTab = CLIP_TAB[indexA]; + + + L0 = SrcPtrP[0] ; + R0 = SrcPtrQ[0] ; + + + if( abs( R0 - L0 ) < Alpha ) + { + imgpel L1 = SrcPtrP[-incP]; + imgpel R1 = SrcPtrQ[ incQ]; + //if( ((abs( R0 - R1) - Beta ) & (abs(L0 - L1) - Beta )) < 0 ) + if( ((abs( R0 - R1) - Beta < 0) && (abs(L0 - L1) - Beta < 0 )) ) + { + if( Strng == 4 ) // INTRA strong filtering + { + SrcPtrQ[0] = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 ); + SrcPtrP[0] = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 ); + } + else + { + C0 = ClipTab[ Strng ] * bitdepth_scale; + tc0 = (C0 + 1); + dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 ); + + SrcPtrP[0] = (imgpel) iClip1 ( max_imgpel_value, L0 + dif ); + SrcPtrQ[0] = (imgpel) iClip1 ( max_imgpel_value, R0 - dif ); + } + } + } + } + } + } +} diff --git a/Src/h264dec/ldecod/src/macroblock.asm b/Src/h264dec/ldecod/src/macroblock.asm new file mode 100644 index 00000000..0f96d2b5 --- /dev/null +++ b/Src/h264dec/ldecod/src/macroblock.asm @@ -0,0 +1,189 @@ +.686 +.XMM +.model FLAT + + +PUBLIC _inv_level_coefficients +_TEXT SEGMENT +_blocks$ = 8 ; size = 4 +_InvLevelScale$ = 12 ; size = 4 +_qp_per$ = 16 ; size = 4 +_inv_level_coefficients PROC + + mov eax, DWORD PTR _blocks$[esp-4] + mov ecx, DWORD PTR _qp_per$[esp-4] + mov edx, DWORD PTR _InvLevelScale$[esp-4] + push esi + push edi + mov edi, 4 +$LL10@inv_level_: + +; 3870 : { +; 3871 : h264_short_block_row_t *block = blocks[b]; +; 3872 : for (j = 0; j < 4; ++j) +; 3873 : { +; 3874 : if (block[j][0]) block[j][0]= rshift_rnd_sf((block[j][0] * InvLevelScale[j][0]) << qp_per, 4); + + movsx esi, WORD PTR [eax+4-4] + test esi, esi + je SHORT $LN4@inv_level_ + imul esi, DWORD PTR [edx] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4-4], si +$LN4@inv_level_: + +; 3875 : if (block[j][1]) block[j][1]= rshift_rnd_sf((block[j][1] * InvLevelScale[j][1]) << qp_per, 4); + + movsx esi, WORD PTR [eax+4-2] + test esi, esi + je SHORT $LN3@inv_level_ + imul esi, DWORD PTR [edx+4] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4-2], si +$LN3@inv_level_: + +; 3876 : if (block[j][2]) block[j][2]= rshift_rnd_sf((block[j][2] * InvLevelScale[j][2]) << qp_per, 4); + + movsx esi, WORD PTR [eax+4] + test esi, esi + je SHORT $LN2@inv_level_ + imul esi, DWORD PTR [edx+8] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4], si +$LN2@inv_level_: + +; 3877 : if (block[j][3]) block[j][3]= rshift_rnd_sf((block[j][3] * InvLevelScale[j][3]) << qp_per, 4); + + movsx esi, WORD PTR [eax+4+2] + test esi, esi + je SHORT $LN6@inv_level_ + imul esi, DWORD PTR [edx+12] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+2], si +$LN6@inv_level_: + movsx esi, WORD PTR [eax+4+4] + test esi, esi + je SHORT $LN27@inv_level_ + imul esi, DWORD PTR [edx+16] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+4], si +$LN27@inv_level_: + movsx esi, WORD PTR [eax+4+6] + test esi, esi + je SHORT $LN28@inv_level_ + imul esi, DWORD PTR [edx+20] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+6], si +$LN28@inv_level_: + movsx esi, WORD PTR [eax+4+8] + test esi, esi + je SHORT $LN29@inv_level_ + imul esi, DWORD PTR [edx+24] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+8], si +$LN29@inv_level_: + movsx esi, WORD PTR [eax+4+10] + test esi, esi + je SHORT $LN30@inv_level_ + imul esi, DWORD PTR [edx+28] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+10], si +$LN30@inv_level_: + movsx esi, WORD PTR [eax+4+12] + test esi, esi + je SHORT $LN32@inv_level_ + imul esi, DWORD PTR [edx+32] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+12], si +$LN32@inv_level_: + movsx esi, WORD PTR [eax+4+14] + test esi, esi + je SHORT $LN33@inv_level_ + imul esi, DWORD PTR [edx+36] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+14], si +$LN33@inv_level_: + movsx esi, WORD PTR [eax+4+16] + test esi, esi + je SHORT $LN34@inv_level_ + imul esi, DWORD PTR [edx+40] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+16], si +$LN34@inv_level_: + movsx esi, WORD PTR [eax+4+18] + test esi, esi + je SHORT $LN35@inv_level_ + imul esi, DWORD PTR [edx+44] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+18], si +$LN35@inv_level_: + movsx esi, WORD PTR [eax+4+20] + test esi, esi + je SHORT $LN37@inv_level_ + imul esi, DWORD PTR [edx+48] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+20], si +$LN37@inv_level_: + movsx esi, WORD PTR [eax+4+22] + test esi, esi + je SHORT $LN38@inv_level_ + imul esi, DWORD PTR [edx+52] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+22], si +$LN38@inv_level_: + movsx esi, WORD PTR [eax+4+24] + test esi, esi + je SHORT $LN39@inv_level_ + imul esi, DWORD PTR [edx+56] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+24], si +$LN39@inv_level_: + movsx esi, WORD PTR [eax+4+26] + test esi, esi + je SHORT $LN9@inv_level_ + imul esi, DWORD PTR [edx+60] + shl esi, cl + add esi, 8 + sar esi, 4 + mov WORD PTR [eax+4+26], si +$LN9@inv_level_: + add eax, 32 ; 00000020H + sub edi, 1 + jne $LL10@inv_level_ + pop edi + pop esi + + ret 0 +_inv_level_coefficients ENDP + +END
\ No newline at end of file diff --git a/Src/h264dec/ldecod/src/macroblock.c b/Src/h264dec/ldecod/src/macroblock.c new file mode 100644 index 00000000..10d083b1 --- /dev/null +++ b/Src/h264dec/ldecod/src/macroblock.c @@ -0,0 +1,6475 @@ + +/*! +*********************************************************************** +* \file macroblock.c +* +* \brief +* Decode a Macroblock +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Inge Lille-Langøy <inge.lille-langoy@telenor.com> +* - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> +* - Jani Lainema <jani.lainema@nokia.com> +* - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> +* - Thomas Wedi <wedi@tnt.uni-hannover.de> +* - Detlev Marpe <marpe@hhi.de> +* - Gabi Blaettermann +* - Ye-Kui Wang <wyk@ieee.org> +* - Lowell Winger <lwinger@lsil.com> +* - Alexis Michael Tourapis <alexismt@ieee.org> +*********************************************************************** +*/ + +#include "contributors.h" + +#include <math.h> + +#include "block.h" +#include "global.h" +#include "mbuffer.h" +#include "elements.h" +#include "errorconcealment.h" +#include "macroblock.h" +#include "fmo.h" +#include "cabac.h" +#include "vlc.h" +#include "image.h" +#include "mb_access.h" +#include "biaridecod.h" +#include "transform8x8.h" +#include "transform.h" +#include "mc_prediction.h" +#include "quant.h" +#include "intra4x4_pred.h" +#include "intra8x8_pred.h" +#include "intra16x16_pred.h" +#include "mv_prediction.h" +#include "optim.h" +#include "mb_prediction.h" +#include <emmintrin.h> +#include <smmintrin.h> + +#if TRACE +#define TRACE_STRING(s) strncpy(currSE.tracestring, s, TRACESTRING_SIZE) +#define TRACE_DECBITS(i) dectracebitcnt(1) +#define TRACE_PRINTF(s) sprintf(type, "%s", s); +#define TRACE_STRING_P(s) strncpy(currSE->tracestring, s, TRACESTRING_SIZE) +#else +#define TRACE_STRING(s) +#define TRACE_DECBITS(i) +#define TRACE_PRINTF(s) +#define TRACE_STRING_P(s) +#endif + +//! look up tables for FRExt_chroma support +void dectracebitcnt(int count); + +static void read_motion_info_from_NAL_p_slice (Macroblock *currMB); +static void read_motion_info_from_NAL_b_slice (Macroblock *currMB); +static void read_ipred_modes (Macroblock *currMB); +static void read_CBP_and_coeffs_from_NAL_CABAC (Macroblock *currMB); +static void read_CBP_and_coeffs_from_NAL_CAVLC (Macroblock *currMB); +static void read_IPCM_coeffs_from_NAL (Slice *currSlice, struct datapartition *dP); +static void read_one_macroblock_i_slice (Macroblock *currMB); +static void read_one_macroblock_p_slice (Macroblock *currMB); +static void read_one_macroblock_b_slice (Macroblock *currMB); +static int decode_one_component_i_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +static int decode_one_component_p_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +static int decode_one_component_b_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); +static int decode_one_component_sp_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture); + +static inline void or_bits(int64 *x, int mask, int position) +{ +#ifdef _M_IX86 + __m64 mmx_x = *(__m64 *)x; + __m64 mmx_mask = _mm_cvtsi32_si64(mask); + mmx_mask=_mm_slli_si64(mmx_mask, position); + mmx_x = _mm_or_si64(mmx_x, mmx_mask); + *(__m64 *)x = mmx_x; +#else + *x |= ((int64) mask << position); +#endif +} + +/*! +************************************************************************ +* \brief +* Set context for reference frames +************************************************************************ +*/ +static inline int BType2CtxRef (int btype) +{ + return (btype >= 4); +} + +/*! +************************************************************************ +* \brief +* Function for reading the reference picture indices using VLC +************************************************************************ +*/ +static char readRefPictureIdx_VLC(SyntaxElement *currSE, DataPartition *dP, int list) +{ +#if TRACE + char tstring[20]; + sprintf( tstring, "ref_idx_l%d", list); + strncpy(currSE->tracestring, tstring, TRACESTRING_SIZE); +#endif + currSE->value2 = list; + readSyntaxElement_UVLC(currSE, dP); + return (char) currSE->value1; +} + +/*! +************************************************************************ +* \brief +* Function for reading the reference picture indices using FLC +************************************************************************ +*/ +static char readRefPictureIdx_FLC(SyntaxElement *currSE, DataPartition *dP, int list) +{ +#if TRACE + char tstring[20]; + sprintf( tstring, "ref_idx_l%d", list); + strncpy(currSE->tracestring, tstring, TRACESTRING_SIZE); +#endif + //currSE->len = 1; + currSE->value1 = 1 - readSyntaxElement_FLC(dP->bitstream, 1); + + return (char) currSE->value1; +} + +/*! +************************************************************************ +* \brief +* Dummy Function for reading the reference picture indices +************************************************************************ +*/ +static char readRefPictureIdx_Null(SyntaxElement *currSE, DataPartition *dP, int list) +{ + return 0; +} + +/*! +************************************************************************ +* \brief +* Function to prepare reference picture indice function pointer +************************************************************************ +*/ +static void prepareListforRefIdx ( Macroblock *currMB, SyntaxElement *currSE, int num_ref_idx_active, int refidx_present) +{ + currMB->readRefPictureIdx = readRefPictureIdx_Null; // Initialize readRefPictureIdx + if(num_ref_idx_active > 1) + { + currSE->mapping = linfo_ue; + if (refidx_present) + { + if (num_ref_idx_active == 2) + currMB->readRefPictureIdx = readRefPictureIdx_FLC; + else + currMB->readRefPictureIdx = readRefPictureIdx_VLC; + } + } +} + +#if defined(_DEBUG) || defined(_M_X64) +void set_chroma_qp(Macroblock* currMB) +{ + // TODO: benski> we could use MMX for this if we could find a formula for QP_SCALE_CR + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + int i; + for (i=0; i<2; ++i) + { + currMB->qpc[i] = iClip3 ( -p_Vid->bitdepth_chroma_qp_scale, 51, currMB->qp + dec_picture->chroma_qp_offset[i] ); + currMB->qpc[i] = currMB->qpc[i] < 0 ? currMB->qpc[i] : QP_SCALE_CR[currMB->qpc[i]]; + currMB->qp_scaled[i + 1] = currMB->qpc[i] + p_Vid->bitdepth_chroma_qp_scale; + } +} +#else +void set_chroma_qp(Macroblock* currMB); +#endif + +/*! +************************************************************************ +* \brief +* updates chroma QP according to luma QP and bit depth +************************************************************************ +*/ +static inline void update_qp(Macroblock *currMB, int qp) +{ + VideoParameters *p_Vid = currMB->p_Vid; + currMB->qp = qp; + currMB->qp_scaled[0] = qp + p_Vid->bitdepth_luma_qp_scale; + set_chroma_qp(currMB); + currMB->is_lossless = (Boolean) ((currMB->qp_scaled[0] == 0) && (p_Vid->lossless_qpprime_flag == 1)); +} + +static void read_delta_quant_CAVLC(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, const byte *partMap, int type) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + dP = &(currSlice->partArr[partMap[type]]); + + + currSE->mapping = linfo_se; + readSyntaxElement_UVLC(currSE, dP); + currMB->delta_quant = (short) currSE->value1; + + + if ((currMB->delta_quant < -(26 + p_Vid->bitdepth_luma_qp_scale/2)) || (currMB->delta_quant > (25 + p_Vid->bitdepth_luma_qp_scale/2))) + error ("mb_qp_delta is out of range", 500); + + p_Vid->qp = ((p_Vid->qp + currMB->delta_quant + 52 + 2*p_Vid->bitdepth_luma_qp_scale)%(52+p_Vid->bitdepth_luma_qp_scale)) - + p_Vid->bitdepth_luma_qp_scale; + update_qp(currMB, p_Vid->qp); +} + +static void inline read_delta_quant_CABAC(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, const byte *partMap, int type) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + dP = &(currSlice->partArr[partMap[type]]); + + currMB->delta_quant = readDquant_CABAC(currSlice, &dP->de_cabac); + + if ((currMB->delta_quant < -(26 + p_Vid->bitdepth_luma_qp_scale/2)) || (currMB->delta_quant > (25 + p_Vid->bitdepth_luma_qp_scale/2))) + error ("mb_qp_delta is out of range", 500); + + p_Vid->qp = ((p_Vid->qp + currMB->delta_quant + 52 + 2*p_Vid->bitdepth_luma_qp_scale)%(52+p_Vid->bitdepth_luma_qp_scale)) - p_Vid->bitdepth_luma_qp_scale; + update_qp(currMB, p_Vid->qp); +} + +/*! +************************************************************************ +* \brief +* Function to read reference picture indice values +************************************************************************ +*/ +static void readMBRefPictureIdx(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0, int step_h0) +{ + int k, j, j0, i0, i; + char refframe; + + for (j0 = 0; j0 < 4; j0 += step_v0) + { + currMB->subblock_y = j0 << 2; + for (i0 = 0; i0 < 4; i0 += step_h0) + { + currMB->subblock_x = i0 << 2; + k = 2 * (j0 >> 1) + (i0 >> 1); + + if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0) + { + refframe = currMB->readRefPictureIdx(currSE, dP, list); + + for (j = j0; j < j0 + step_v0; ++j) + { + for (i=0;i<step_h0;i++) + { + motion[j][currMB->block_x + i0 + i].ref_idx = refframe; + } + } + } + } + } +} + +static void readMBRefPictureIdx_CABAC1(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0) +{ + int k, j, j0, i0; + char refframe; + + for (j0 = 0; j0 < 4; j0 += step_v0) + { + currMB->subblock_y = j0 << 2; + for (i0 = 0; i0 < 4; i0 += 1) + { + currMB->subblock_x = i0 << 2; + k = 2 * (j0 >> 1) + (i0 >> 1); + + if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0) + { + refframe = readRefFrame_CABAC(currMB, &dP->de_cabac, list, i0<<2, j0<<2); + + for (j = j0; j < j0 + step_v0; ++j) + motion[j][currMB->block_x + i0].ref_idx=refframe; + } + } + } +} + +static void readMBRefPictureIdx_CABAC2(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0) +{ + int k, j, j0; + char refframe; + + for (j0 = 0; j0 < 4; j0 += step_v0) + { + currMB->subblock_y = j0 << 2; + + currMB->subblock_x = 0 << 2; + k = 2 * (j0 >> 1) + (0 >> 1); + + if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0) + { + refframe = readRefFrame_CABAC0(currMB, &dP->de_cabac, list, j0<<2); + + for (j = j0; j < j0 + step_v0; ++j) + { + motion[j][currMB->block_x + 0].ref_idx=refframe; + motion[j][currMB->block_x + 1].ref_idx=refframe; + } + } + + // + + currMB->subblock_x = 2 << 2; + k = 2 * (j0 >> 1) + (2 >> 1); + + if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0) + { + refframe = readRefFrame_CABAC(currMB, &dP->de_cabac, list, 8, j0<<2); + + for (j = j0; j < j0 + step_v0; ++j) + { + motion[j][currMB->block_x + 2].ref_idx=refframe; + motion[j][currMB->block_x + 3].ref_idx=refframe; + } + } + + } +} + + +static void readMBRefPictureIdx_CABAC4(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0) +{ + int k, j, j0; + char refframe; + + for (j0 = 0; j0 < 4; j0 += step_v0) + { + currMB->subblock_y = j0 << 2; + currMB->subblock_x = 0; + k = j0 & ~1; + + if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0) + { + refframe = readRefFrame_CABAC0(currMB, &dP->de_cabac, list, j0<<2); + for (j = j0; j < j0 + step_v0; ++j) + { + motion[j][currMB->block_x + 0].ref_idx=refframe; + motion[j][currMB->block_x + 1].ref_idx=refframe; + motion[j][currMB->block_x + 2].ref_idx=refframe; + motion[j][currMB->block_x + 3].ref_idx=refframe; + } + } + } +} + +static void readMBRefPictureIdx_CABAC(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0, int step_h0) +{ + switch(step_h0) + { + case 1: + readMBRefPictureIdx_CABAC1(dP, currMB, motion, list, step_v0); + break; + case 2: + readMBRefPictureIdx_CABAC2(dP, currMB, motion, list, step_v0); + break; + case 4: + readMBRefPictureIdx_CABAC4(dP, currMB, motion, list, step_v0); + break; + } +} + +static void readMBRefPictureIdx_CABAC_NoReference(Macroblock *currMB, PicMotion **motion, int list, int step_v0, int step_h0) +{ + int k, j, j0, i0, i; + + for (j0 = 0; j0 < 4; j0 += step_v0) + { + for (i0 = 0; i0 < 4; i0 += step_h0) + { + k = 2 * (j0 >> 1) + (i0 >> 1); + + if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0) + { + for (j = j0; j < j0 + step_v0; ++j) + { + for (i=0;i<step_h0;i++) + { + motion[j][currMB->block_x + i0 + i].ref_idx=0; + } + } + } + } + } +} + +/*! +************************************************************************ +* \brief +* Function to read reference picture indice values +************************************************************************ +*/ +static void readMBMotionVectors(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, int list, int step_h0, int step_v0) +{ + int i, j, k, i4, j4, ii, jj, kk, i0, j0; + short curr_mvd[2], curr_mv[2], pred_mv[2]; + MotionVector (*mvd)[4]; + //MotionVector **mv; + int mv_mode, step_h, step_v; + char cur_ref_idx; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + PixelPos block[4]; // neighbor blocks + + + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + kk = 2 * (j0 >> 1) + (i0 >> 1); + if ((currMB->b8pdir[kk]== list || currMB->b8pdir[kk]== BI_PRED) && (currMB->b8mode[kk] !=0))//has forward vector + { + PicMotion **list_motion = motion->motion[list]; + cur_ref_idx = list_motion[currMB->block_y+j0][currMB->block_x+i0].ref_idx; + mv_mode = currMB->b8mode[kk]; + step_h = BLOCK_STEP [mv_mode][0]; + step_v = BLOCK_STEP [mv_mode][1]; + + for (j = j0; j < j0 + step_v0; j += step_v) + { + PicMotion **mv; + currMB->subblock_y = j << 2; // position used for context determination + j4 = currMB->block_y + j; + mv = &list_motion[j4]; + mvd = &currMB->mvd [list][j]; + for (i = i0; i < i0 + step_h0; i += step_h) + { + currMB->subblock_x = i << 2; // position used for context determination + i4 = currMB->block_x + i; + + get_neighbors(currMB, block, BLOCK_SIZE * i, BLOCK_SIZE * j, 4 * step_h); + + // first make mv-prediction + currMB->GetMVPredictor (currMB, block, pred_mv, cur_ref_idx, list_motion, BLOCK_SIZE * i, BLOCK_SIZE * j, 4 * step_h, 4 * step_v); + + for (k=0; k < 2; ++k) + { + currSE->value2 = (k << 1) + list; // identifies the component; only used for context determination + readSyntaxElement_UVLC(currSE, dP); + curr_mvd[k] = (short) currSE->value1; + curr_mv [k] = (short)(curr_mvd[k] + pred_mv[k]); // compute motion vector + } + + // Init motion vectors + for(jj = 0; jj < step_v; ++jj) + { + for(ii = i4; ii < i4 + step_h; ++ii) + { + memcpy(&mv[jj][ii].mv, curr_mv, sizeof(MotionVector)); + } + } + + // Init first line (mvd) + for(ii = i; ii < i + step_h; ++ii) + { + memcpy(mvd[0][ii], curr_mvd, sizeof(MotionVector)); + } + + // now copy all other lines + for(jj = 1; jj < step_v; ++jj) + { + memcpy(mvd[jj][i], mvd[0][i], step_h * sizeof(MotionVector)); + } + } + } + } + } + } +} + +static void readMBMotionVectors_CABAC(DataPartition *dP, Macroblock *currMB, int list, int step_h0, int step_v0) +{ + int i, j, k, i4, j4, ii, jj, kk, i0, j0; + short curr_mvd[2], curr_mv[2], pred_mv[2]; + MotionVector (*mvd)[4]; + //MotionVector **mv; + int mv_mode, step_h, step_v; + char cur_ref_idx; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + PixelPos block[4]; // neighbor blocks + + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + kk = (j0 & ~1) + (i0 >> 1); + if ((currMB->b8pdir[kk]== list || currMB->b8pdir[kk]== BI_PRED) && (currMB->b8mode[kk] !=0))//has forward vector + { + PicMotion **list_motion = motion->motion[list]; + cur_ref_idx = list_motion[currMB->block_y+j0][currMB->block_x+i0].ref_idx; + mv_mode = currMB->b8mode[kk]; + step_h = BLOCK_STEP [mv_mode][0]; + step_v = BLOCK_STEP [mv_mode][1]; + + for (j = j0; j < j0 + step_v0; j += step_v) + { + PicMotion **mv; + int block_j = j << 2; + currMB->subblock_y = block_j; // position used for context determination + j4 = currMB->block_y + j; + mv = &list_motion[j4]; + mvd = &currMB->mvd [list][j]; + for (i = i0; i < i0 + step_h0; i += step_h) + { + int block_i=i << 2; + currMB->subblock_x = block_i; // position used for context determination + i4 = currMB->block_x + i; + + get_neighbors(currMB, block, block_i, block_j, 4 * step_h); + + // first make mv-prediction + currMB->GetMVPredictor (currMB, block, pred_mv, cur_ref_idx, list_motion, block_i, block_j, 4 * step_h, 4 * step_v); + + for (k=0; k < 2; ++k) + { + //currSE.value2 = (k << 1) + list; // identifies the component; only used for context determination + curr_mvd[k] = (short)readMVD_CABAC(currMB, &dP->de_cabac, k, list, block_i, block_j); + curr_mv [k] = (short)(curr_mvd[k] + pred_mv[k]); // compute motion vector + } + + // Init motion vectors + for(jj = 0; jj < step_v; ++jj) + { + for(ii = i4; ii < i4 + step_h; ++ii) + { + *(int32_t *)(&mv[jj][ii].mv) = *(int32_t *)curr_mv; + } + } + + // Init first line (mvd) + for(ii = i; ii < i + step_h; ++ii) + { + *(int32_t *)(mvd[0][ii]) = *(int32_t *)curr_mvd; + } + + // now copy all other lines + for(jj = 1; jj < step_v; ++jj) + { + memcpy_amd(mvd[jj][i], mvd[0][i], step_h * sizeof(MotionVector)); + } + } + } + } + } + } +} + +/*! +************************************************************************ +* \brief +* initializes the current macroblock +************************************************************************ +*/ +void start_macroblock(Slice *currSlice, Macroblock **currMB) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + int mb_nr = p_Vid->current_mb_nr; + Macroblock *mb = &p_Vid->mb_data[mb_nr]; // intialization code deleted, see below, StW + *currMB = mb; + + mb->p_Vid = p_Vid; + mb->p_Slice = currSlice; + mb->mbAddrX = mb_nr; + + //assert (mb_nr < (int) p_Vid->PicSizeInMbs); + + /* Update coordinates of the current macroblock */ + if (currSlice->mb_aff_frame_flag) + { + mb->mb_x = (mb_nr) % ((2*p_Vid->width) / MB_BLOCK_SIZE); + mb->mb_y = 2*((mb_nr) / ((2*p_Vid->width) / MB_BLOCK_SIZE)); + + mb->mb_y += (mb->mb_x & 0x01); + mb->mb_x >>= 1; + } + else + { + mb->mb_x = p_Vid->PicPos[mb_nr][0]; + mb->mb_y = p_Vid->PicPos[mb_nr][1]; + } + + /* Define vertical positions */ + mb->block_y = mb->mb_y * BLOCK_SIZE; /* luma block position */ + mb->block_y_aff = mb->block_y; + mb->pix_y = mb->mb_y * MB_BLOCK_SIZE; /* luma macroblock position */ + mb->pix_c_y = mb->mb_y * p_Vid->mb_cr_size_y; /* chroma macroblock position */ + + /* Define horizontal positions */ + mb->block_x = mb->mb_x * BLOCK_SIZE; /* luma block position */ + mb->pix_x = mb->mb_x * MB_BLOCK_SIZE; /* luma pixel position */ + mb->pix_c_x = mb->mb_x * p_Vid->mb_cr_size_x; /* chroma pixel position */ + + // Save the slice number of this macroblock. When the macroblock below + // is coded it will use this to decide if prediction for above is possible + mb->slice_nr = (short) p_Vid->current_slice_nr; + + if (p_Vid->current_slice_nr >= MAX_NUM_SLICES) + { + error ("Maximum number of supported slices exceeded. \nPlease recompile with increased value for MAX_NUM_SLICES", 200); + } + + dec_picture->slice_id[mb->mb_y][mb->mb_x] = (short) p_Vid->current_slice_nr; + dec_picture->max_slice_id = (short) imax(p_Vid->current_slice_nr, dec_picture->max_slice_id); + + CheckAvailabilityOfNeighbors(mb); + + // Select appropriate MV predictor function + init_motion_vector_prediction(*currMB, currSlice->mb_aff_frame_flag); + + set_read_and_store_CBP(currMB, currSlice->active_sps->chroma_format_idc); + + // Reset syntax element entries in MB struct + update_qp(*currMB, p_Vid->qp); + mb->mb_type = 0; + mb->delta_quant = 0; + mb->cbp = 0; + mb->c_ipred_mode = DC_PRED_8; //GB + + if (currSlice->slice_type != I_SLICE) + { + if (currSlice->slice_type != B_SLICE) + memzero64(mb->mvd);//, BLOCK_MULTIPLE * BLOCK_MULTIPLE * 2 * sizeof(short)); + else + memzero128(mb->mvd);//, 2 * BLOCK_MULTIPLE * BLOCK_MULTIPLE * 2 * sizeof(short)); + } + + memzero24(mb->cbp_blk); + memzero24(mb->cbp_bits); + memzero24(mb->cbp_bits_8x8); + + // initialize currSlice->mb_rres + memset(currSlice->mb_rres8, 0, sizeof(currSlice->mb_rres8)); + + // store filtering parameters for this MB + mb->DFDisableIdc = currSlice->DFDisableIdc; + mb->DFAlphaC0Offset = currSlice->DFAlphaC0Offset; + mb->DFBetaOffset = currSlice->DFBetaOffset; + +} + +/*! +************************************************************************ +* \brief +* set coordinates of the next macroblock +* check end_of_slice condition +************************************************************************ +*/ +Boolean exit_macroblock(Slice *currSlice, int eos_bit) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + + //! The if() statement below resembles the original code, which tested + //! p_Vid->current_mb_nr == p_Vid->PicSizeInMbs. Both is, of course, nonsense + //! In an error prone environment, one can only be sure to have a new + //! picture by checking the tr of the next slice header! + + // printf ("exit_macroblock: FmoGetLastMBOfPicture %d, p_Vid->current_mb_nr %d\n", FmoGetLastMBOfPicture(), p_Vid->current_mb_nr); + ++(p_Vid->num_dec_mb); + + if (p_Vid->num_dec_mb == p_Vid->PicSizeInMbs) + { + return TRUE; + } + // ask for last mb in the slice CAVLC + else + { + + p_Vid->current_mb_nr = FmoGetNextMBNr (p_Vid, p_Vid->current_mb_nr); + + if (p_Vid->current_mb_nr == -1) // End of Slice group, MUST be end of slice + { + assert (currSlice->nal_startcode_follows (currSlice, eos_bit) == TRUE); + return TRUE; + } + + if(currSlice->nal_startcode_follows(currSlice, eos_bit) == FALSE) + return FALSE; + + if(currSlice->slice_type == I_SLICE || currSlice->slice_type == SI_SLICE || p_Vid->active_pps->entropy_coding_mode_flag == CABAC) + return TRUE; + if(p_Vid->cod_counter <= 0) + return TRUE; + return FALSE; + } +} + +/*! +************************************************************************ +* \brief +* Interpret the mb mode for P-Frames +************************************************************************ +*/ +static void interpret_mb_mode_P(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + static const int ICBPTAB[6] = {0,16,32,15,31,47}; + int mbmode = currMB->mb_type; + +#define ZERO_P8x8 (mbmode==5) +#define MODE_IS_P8x8 (mbmode==4 || mbmode==5) +#define MODE_IS_I4x4 (mbmode==6) +#define I16OFFSET (mbmode-7) +#define MODE_IS_IPCM (mbmode==31) + + if(mbmode <4) + { + currMB->mb_type = mbmode; + memset(&currMB->b8mode[0],mbmode,4 * sizeof(char)); + memset(&currMB->b8pdir[0], 0, 4 * sizeof(char)); + } + else if(MODE_IS_P8x8) + { + currMB->mb_type = P8x8; + p_Vid->allrefzero = ZERO_P8x8; + } + else if(MODE_IS_I4x4) + { + currMB->mb_type = I4MB; + memset(&currMB->b8mode[0],IBLOCK, 4 * sizeof(char)); + memset(&currMB->b8pdir[0], -1, 4 * sizeof(char)); + } + else if(MODE_IS_IPCM) + { + currMB->mb_type = IPCM; + currMB->cbp = -1; + currMB->i16mode = 0; + + memset(&currMB->b8mode[0], 0, 4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1, 4 * sizeof(char)); + } + else + { + currMB->mb_type = I16MB; + currMB->cbp = ICBPTAB[(I16OFFSET)>>2]; + currMB->i16mode = (I16OFFSET) & 0x03; + memset(&currMB->b8mode[0], 0, 4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1, 4 * sizeof(char)); + } +} + +/*! +************************************************************************ +* \brief +* Interpret the mb mode for I-Frames +************************************************************************ +*/ +static void interpret_mb_mode_I(Macroblock *currMB) +{ + static const int ICBPTAB[6] = {0,16,32,15,31,47}; + int mbmode = currMB->mb_type; + + if (mbmode==0) + { + currMB->mb_type = I4MB; + memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + } + else if(mbmode==25) + { + currMB->mb_type=IPCM; + currMB->cbp= -1; + currMB->i16mode = 0; + + memset(&currMB->b8mode[0],0,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + } + else + { + currMB->mb_type = I16MB; + currMB->cbp= ICBPTAB[(mbmode-1)>>2]; + currMB->i16mode = (mbmode-1) & 0x03; + memset(&currMB->b8mode[0], 0, 4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1, 4 * sizeof(char)); + } +} + +/*! +************************************************************************ +* \brief +* Interpret the mb mode for B-Frames +************************************************************************ +*/ +static void interpret_mb_mode_B(Macroblock *currMB) +{ + static const int offset2pdir16x16[12] = {0, 0, 1, 2, 0,0,0,0,0,0,0,0}; + static const int offset2pdir16x8[22][2] = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,1},{0,0},{0,1},{0,0},{1,0}, + {0,0},{0,2},{0,0},{1,2},{0,0},{2,0},{0,0},{2,1},{0,0},{2,2},{0,0}}; + static const int offset2pdir8x16[22][2] = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,1},{0,0},{0,1},{0,0}, + {1,0},{0,0},{0,2},{0,0},{1,2},{0,0},{2,0},{0,0},{2,1},{0,0},{2,2}}; + + static const int ICBPTAB[6] = {0,16,32,15,31,47}; + + int i, mbmode; + int mbtype = currMB->mb_type; + + //--- set mbtype, b8type, and b8pdir --- + if (mbtype==0) // direct + { + mbmode=0; + memset(&currMB->b8mode[0],0,4 * sizeof(char)); + memset(&currMB->b8pdir[0],2,4 * sizeof(char)); + } + else if (mbtype==23) // intra4x4 + { + mbmode=I4MB; + memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + } + else if ((mbtype>23) && (mbtype<48) ) // intra16x16 + { + mbmode=I16MB; + memset(&currMB->b8mode[0],0,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + + currMB->cbp = ICBPTAB[(mbtype-24)>>2]; + currMB->i16mode = (mbtype-24) & 0x03; + } + else if (mbtype==22) // 8x8(+split) + { + mbmode=P8x8; // b8mode and pdir is transmitted in additional codewords + } + else if (mbtype<4) // 16x16 + { + mbmode=1; + memset(&currMB->b8mode[0], 1,4 * sizeof(char)); + memset(&currMB->b8pdir[0],offset2pdir16x16[mbtype],4 * sizeof(char)); + } + else if(mbtype==48) + { + mbmode=IPCM; + memset(&currMB->b8mode[0], 0,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + + currMB->cbp= -1; + currMB->i16mode = 0; + } + + else if ((mbtype&0x01)==0) // 16x8 + { + mbmode=2; + memset(&currMB->b8mode[0], 2,4 * sizeof(char)); + for(i=0;i<4;++i) + { + currMB->b8pdir[i] = (char) offset2pdir16x8 [mbtype][i>>1]; + } + } + else + { + mbmode=3; + memset(&currMB->b8mode[0], 3,4 * sizeof(char)); + for(i=0;i<4; ++i) + { + currMB->b8pdir[i] = (char) offset2pdir8x16 [mbtype][i&0x01]; + } + } + currMB->mb_type = mbmode; +} +/*! +************************************************************************ +* \brief +* Interpret the mb mode for SI-Frames +************************************************************************ +*/ +static void interpret_mb_mode_SI(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + const int ICBPTAB[6] = {0,16,32,15,31,47}; + int mbmode = currMB->mb_type; + + if (mbmode==0) + { + currMB->mb_type = SI4MB; + memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + p_Vid->siblock[currMB->mb_y][currMB->mb_x]=1; + } + else if (mbmode==1) + { + currMB->mb_type = I4MB; + memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + } + else if(mbmode==26) + { + currMB->mb_type=IPCM; + currMB->cbp= -1; + currMB->i16mode = 0; + memset(&currMB->b8mode[0],0,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + } + + else + { + currMB->mb_type = I16MB; + currMB->cbp= ICBPTAB[(mbmode-2)>>2]; + currMB->i16mode = (mbmode-2) & 0x03; + memset(&currMB->b8mode[0],0,4 * sizeof(char)); + memset(&currMB->b8pdir[0],-1,4 * sizeof(char)); + } +} + +/*! +************************************************************************ +* \brief +* Set mode interpretation based on slice type +************************************************************************ +*/ +void setup_slice_methods(Slice *currSlice) +{ + switch (currSlice->slice_type) + { + case P_SLICE: + currSlice->interpret_mb_mode = interpret_mb_mode_P; + currSlice->read_motion_info_from_NAL = read_motion_info_from_NAL_p_slice; + currSlice->read_one_macroblock = read_one_macroblock_p_slice; + currSlice->decode_one_component = decode_one_component_p_slice; + break; + case SP_SLICE: + currSlice->interpret_mb_mode = interpret_mb_mode_P; + currSlice->read_motion_info_from_NAL = read_motion_info_from_NAL_p_slice; + currSlice->read_one_macroblock = read_one_macroblock_p_slice; + currSlice->decode_one_component = decode_one_component_sp_slice; + break; + case B_SLICE: + currSlice->interpret_mb_mode = interpret_mb_mode_B; + currSlice->read_motion_info_from_NAL = read_motion_info_from_NAL_b_slice; + currSlice->read_one_macroblock = read_one_macroblock_b_slice; + currSlice->decode_one_component = decode_one_component_b_slice; + break; + case I_SLICE: + currSlice->interpret_mb_mode = interpret_mb_mode_I; + currSlice->read_motion_info_from_NAL = NULL; + currSlice->read_one_macroblock = read_one_macroblock_i_slice; + currSlice->decode_one_component = decode_one_component_i_slice; + break; + case SI_SLICE: + currSlice->interpret_mb_mode = interpret_mb_mode_SI; + currSlice->read_motion_info_from_NAL = NULL; + currSlice->read_one_macroblock = read_one_macroblock_i_slice; + currSlice->decode_one_component = decode_one_component_i_slice; + break; + default: + printf("Unsupported slice type\n"); + break; + } + + if( IS_INDEPENDENT(currSlice->p_Vid) ) + currSlice->compute_colocated = compute_colocated_JV; + else + { + if (currSlice->active_sps->frame_mbs_only_flag) + currSlice->compute_colocated = compute_colocated; + else + currSlice->compute_colocated = compute_colocated_frames_mbs; + } + + switch(currSlice->p_Vid->active_pps->entropy_coding_mode_flag) + { + case CABAC: + currSlice->read_CBP_and_coeffs_from_NAL = read_CBP_and_coeffs_from_NAL_CABAC; + break; + case CAVLC: + currSlice->read_CBP_and_coeffs_from_NAL = read_CBP_and_coeffs_from_NAL_CAVLC; + break; + default: + printf("Unsupported entropy coding mode\n"); + break; + } + +} + +void macroblock_set_dc_pred(VideoParameters *p_Vid, int block_x, int block_y) +{ + int32_t dc_pred = 2 + (2 << 8) + (2 << 16) + (2 << 24); + int32_t *pred = (int32_t *)&p_Vid->ipredmode[block_y][block_x]; + int stride = p_Vid->PicWidthInMbs; + int i; + for (i=0;i<BLOCK_SIZE;i++) + { + *pred = dc_pred; + pred += stride; + } +} +/*! +************************************************************************ +* \brief +* init macroblock I and P frames +************************************************************************ +*/ +#ifdef _M_IX86 +static void init_macroblock(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int j; + int block_x = currMB->block_x, block_y = currMB->block_y; + PicMotionParams *motion = &p_Vid->dec_picture->motion; + PicMotion **list_motion0, **list_motion1; + __m64 const_0_minus_1 = _mm_setr_pi32(0, -1); + macroblock_set_dc_pred(p_Vid, block_x, block_y); + + // reset vectors and pred. modes + list_motion0 = motion->motion[LIST_0]; + for(j = 0; j < BLOCK_SIZE; j++) + { + PicMotion *block = &list_motion0[block_y+j][block_x]; + block[0].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[0].mv = const_0_minus_1; + + block[1].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[1].mv = const_0_minus_1; + + block[2].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[2].mv = const_0_minus_1; + + + block[3].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[3].mv = const_0_minus_1; + } + + list_motion1 = motion->motion[LIST_1]; + for(j = 0; j < BLOCK_SIZE; j++) + { + PicMotion *block = &list_motion1[block_y+j][block_x]; + + block[0].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[0].mv = const_0_minus_1; + + block[1].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[1].mv = const_0_minus_1; + + block[2].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[2].mv = const_0_minus_1; + + + block[3].ref_pic_id = UNDEFINED_REFERENCE; + *(__m64 *)&block[3].mv = const_0_minus_1; + } + +} + + +#else +static void init_macroblock(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int i, j; + int block_x = currMB->block_x, block_y = currMB->block_y; + PicMotionParams *motion = &p_Vid->dec_picture->motion; + PicMotion **list_motion0, **list_motion1; + macroblock_set_dc_pred(p_Vid, block_x, block_y); + + // reset vectors and pred. modes + list_motion0 = motion->motion[LIST_0]; + for(j = 0; j < BLOCK_SIZE; j++) + { + PicMotion *block0 = &list_motion0[block_y+j][block_x]; + block0[0].ref_pic_id = UNDEFINED_REFERENCE; + memset(block0[0].mv, 0, sizeof(MotionVector)); + block0[0].ref_idx = -1; + + block0[1].ref_pic_id = UNDEFINED_REFERENCE; + memset(block0[1].mv, 0, sizeof(MotionVector)); + block0[1].ref_idx = -1; + + block0[2].ref_pic_id = UNDEFINED_REFERENCE; + memset(block0[2].mv, 0, sizeof(MotionVector)); + block0[2].ref_idx = -1; + + block0[3].ref_pic_id = UNDEFINED_REFERENCE; + memset(block0[3].mv, 0, sizeof(MotionVector)); + block0[3].ref_idx = -1; + + + } + + list_motion1 = motion->motion[LIST_1]; + for(j = 0; j < BLOCK_SIZE; j++) + { + PicMotion *block1 = &list_motion1[block_y+j][block_x]; + + block1[0].ref_pic_id = UNDEFINED_REFERENCE; + memset(block1[0].mv, 0, sizeof(MotionVector)); + block1[0].ref_idx = -1; + + block1[1].ref_pic_id = UNDEFINED_REFERENCE; + memset(block1[1].mv, 0, sizeof(MotionVector)); + block1[1].ref_idx = -1; + + block1[2].ref_pic_id = UNDEFINED_REFERENCE; + memset(block1[2].mv, 0, sizeof(MotionVector)); + block1[2].ref_idx = -1; + + block1[3].ref_pic_id = UNDEFINED_REFERENCE; + memset(block1[3].mv, 0, sizeof(MotionVector)); + block1[3].ref_idx = -1; + } + +} + + +#endif +/*! +************************************************************************ +* \brief +* Sets mode for 8x8 block +************************************************************************ +*/ +void SetB8Mode (Macroblock* currMB, int value, int i) +{ + Slice* currSlice = currMB->p_Slice; + static const char p_v2b8 [ 5] = {4, 5, 6, 7, IBLOCK}; + static const char p_v2pd [ 5] = {0, 0, 0, 0, -1}; + static const char b_v2b8 [14] = {0, 4, 4, 4, 5, 6, 5, 6, 5, 6, 7, 7, 7, IBLOCK}; + static const char b_v2pd [14] = {2, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 2, -1}; + + if (currSlice->slice_type==B_SLICE) + { + currMB->b8mode[i] = b_v2b8[value]; + currMB->b8pdir[i] = b_v2pd[value]; + } + else + { + currMB->b8mode[i] = p_v2b8[value]; + currMB->b8pdir[i] = p_v2pd[value]; + } +} + + +void reset_coeffs(Slice *currSlice) +{ + + VideoParameters *p_Vid = currSlice->p_Vid; + + // reset all coeffs +#ifdef _DEBUG + { + int m; + for (m=0;m<3;m++) + { + int z; + short *b = &currSlice->cof[m][0][0]; + for (z=0;z<256;z++) + { + if (b[z] != 0) + { + DebugBreak(); + } + } + } + } +#endif + + // benski> don't think this is necessary... enable check above to be sure + // memset(currSlice->cof, 0, sizeof(currSlice->cof)); + + // CAVLC + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + memzero48(p_Vid->nz_coeff[p_Vid->current_mb_nr]); +} + +void field_flag_inference(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + if (currMB->mb_avail_left) + { + currMB->mb_field = p_Vid->mb_data[currMB->mb_addr_left].mb_field; + } + else + { + // check top macroblock pair + currMB->mb_field = currMB->mb_avail_up ? p_Vid->mb_data[currMB->mb_addr_up].mb_field : FALSE; + } +} + + +static void skip_macroblock(Macroblock *currMB) +{ + short pred_mv[2]; + int zeroMotionAbove; + int zeroMotionLeft; + PixelPos mb[4]; // neighbor blocks + int i, j; + int a_mv_y = 0; + int a_ref_idx = 0; + int b_mv_y = 0; + int b_ref_idx = 0; + int img_block_y = currMB->block_y; + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + int list_offset = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (currMB->mbAddrX & 0x01) ? 4 : 2 : 0; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + short *a_mv = NULL; + short *b_mv = NULL; + + get_neighbors0016(currMB, mb); + + if (mb[0].available) + { + a_mv = motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].mv; + a_mv_y = a_mv[1]; + a_ref_idx = motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx; + + if (currMB->mb_field && !p_Vid->mb_data[mb[0].mb_addr].mb_field) + { + a_mv_y /=2; + a_ref_idx *=2; + } + if (!currMB->mb_field && p_Vid->mb_data[mb[0].mb_addr].mb_field) + { + a_mv_y *=2; + a_ref_idx >>=1; + } + } + + if (mb[1].available) + { + b_mv = motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].mv; + b_mv_y = b_mv[1]; + b_ref_idx = motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx; + + if (currMB->mb_field && !p_Vid->mb_data[mb[1].mb_addr].mb_field) + { + b_mv_y /=2; + b_ref_idx *=2; + } + if (!currMB->mb_field && p_Vid->mb_data[mb[1].mb_addr].mb_field) + { + b_mv_y *=2; + b_ref_idx >>=1; + } + } + + zeroMotionLeft = !mb[0].available ? 1 : a_ref_idx==0 && a_mv[0]==0 && a_mv_y==0 ? 1 : 0; + zeroMotionAbove = !mb[1].available ? 1 : b_ref_idx==0 && b_mv[0]==0 && b_mv_y==0 ? 1 : 0; + + currMB->cbp = 0; + reset_coeffs(currSlice); + + if (zeroMotionAbove || zeroMotionLeft) + { + for(j = img_block_y; j < img_block_y + BLOCK_SIZE; ++j) + { + for(i=currMB->block_x;i<currMB->block_x + BLOCK_SIZE; ++i) + { + memset(&motion->motion[LIST_0][j][i].mv, 0, sizeof(MotionVector)); + motion->motion[LIST_0][j][i].ref_idx=0; + motion->motion[LIST_0][j][i].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][0]; + } + } + } + else + { + currMB->GetMVPredictor (currMB, mb, pred_mv, 0, motion->motion[LIST_0], 0, 0, MB_BLOCK_SIZE, MB_BLOCK_SIZE); + + // Set first block line (position img_block_y) + for(j=img_block_y; j < img_block_y + BLOCK_SIZE; ++j) + { + for(i=currMB->block_x;i<currMB->block_x + BLOCK_SIZE; ++i) + { + memcpy(&motion->motion[LIST_0][j][i].mv, pred_mv, sizeof(MotionVector)); + motion->motion[LIST_0][j][i].ref_idx=0; + motion->motion[LIST_0][j][i].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][0]; + } + } + } +} + +static void concealIPCMcoeffs(Macroblock *currMB) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + int i, j, k; + + for(i=0;i<MB_BLOCK_SIZE;++i) + { + for(j=0;j<MB_BLOCK_SIZE;++j) + { + currSlice->ipcm[0][i][j] = p_Vid->dc_pred_value_comp[0]; + } + } + + if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid)) + { + for (k = 0; k < 2; ++k) + { + for(i=0;i<p_Vid->mb_cr_size_y;++i) + { + for(j=0;j<p_Vid->mb_cr_size_x;++j) + { + currSlice->ipcm[k][i][j] = p_Vid->dc_pred_value_comp[k]; + } + } + } + } +} + +/*! +************************************************************************ +* \brief +* Get the syntax elements from the NAL +************************************************************************ +*/ +static void read_one_macroblock_i_slice(Macroblock *currMB) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + SyntaxElement currSE; + int mb_nr = currMB->mbAddrX; + + DataPartition *dP; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + + currMB->mb_field = ((mb_nr&0x01) == 0)? FALSE : p_Vid->mb_data[mb_nr-1].mb_field; + + update_qp(currMB, p_Vid->qp); + + // read MB mode ***************************************************************** + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + currSE.mapping = linfo_ue; + + // read MB aff + if (currSlice->mb_aff_frame_flag && (mb_nr&0x01)==0) + { + TRACE_STRING("mb_field_decoding_flag"); + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + currMB->mb_field = readSyntaxElement_FLC(dP->bitstream, 1); + } + else + { + currMB->mb_field = readFieldModeInfo_CABAC(currMB, &dP->de_cabac); + } + } + + if(p_Vid->active_pps->entropy_coding_mode_flag == CABAC) + { + CheckAvailabilityOfNeighborsCABAC(currMB); + + // read MB type + currMB->mb_type = readMB_typeInfo_CABAC(currMB, &dP->de_cabac); + } + else + { // CAVLC + // read MB type + readSyntaxElement_UVLC(&currSE, dP); + currMB->mb_type = currSE.value1; + } + + + + currMB->ei_flag = 0; + + motion->mb_field[mb_nr] = (byte) currMB->mb_field; + + currMB->block_y_aff = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (mb_nr&0x01) ? (currMB->block_y - 4)>>1 : currMB->block_y >> 1 : currMB->block_y; + + p_Vid->siblock[currMB->mb_y][currMB->mb_x] = 0; + + currSlice->interpret_mb_mode(currMB); + + //init NoMbPartLessThan8x8Flag + currMB->NoMbPartLessThan8x8Flag = TRUE; + + //============= Transform Size Flag for INTRA MBs ============= + //------------------------------------------------------------- + //transform size flag for INTRA_4x4 and INTRA_8x8 modes + if (currMB->mb_type == I4MB && p_Vid->Transform8x8Mode) + { + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + TRACE_STRING("transform_size_8x8_flag"); + + // read CAVLC transform_size_8x8_flag + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + currMB->luma_transform_size_8x8_flag = readSyntaxElement_FLC(dP->bitstream, 1); + } + else + { + currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &dP->de_cabac); + } + + if (currMB->luma_transform_size_8x8_flag) + { + currMB->mb_type = I8MB; + memset(&currMB->b8mode, I8MB, 4 * sizeof(char)); + memset(&currMB->b8pdir, -1, 4 * sizeof(char)); + } + } + else + { + currMB->luma_transform_size_8x8_flag = FALSE; + } + + //--- init macroblock data --- + init_macroblock(currMB); + + if(currMB->mb_type != IPCM) + { + // intra prediction modes for a macroblock 4x4 ********************************************** + read_ipred_modes(currMB); + + // read CBP and Coeffs *************************************************************** + currSlice->read_CBP_and_coeffs_from_NAL (currMB); + } + else + { + //read pcm_alignment_zero_bit and pcm_byte[i] + + // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the + // same category as MBTYPE + if ( currSlice->dp_mode && currSlice->dpB_NotPresent ) + { + concealIPCMcoeffs(currMB); + } + else + { + dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]); + read_IPCM_coeffs_from_NAL(currSlice, dP); + } + } + + return; +} + +/*! +************************************************************************ +* \brief +* Get the syntax elements from the NAL +************************************************************************ +*/ +static void read_one_macroblock_p_slice(Macroblock *currMB) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i; + + SyntaxElement currSE; + int mb_nr = currMB->mbAddrX; + + DataPartition *dP; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + Macroblock *topMB = NULL; + int prevMbSkipped = 0; + int check_bottom, read_bottom, read_top; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + + if (currSlice->mb_aff_frame_flag) + { + if (mb_nr&0x01) + { + topMB= &p_Vid->mb_data[mb_nr-1]; + prevMbSkipped = (topMB->mb_type == 0); + } + else + prevMbSkipped = 0; + } + + currMB->mb_field = ((mb_nr&0x01) == 0)? FALSE : p_Vid->mb_data[mb_nr-1].mb_field; + + update_qp(currMB, p_Vid->qp); + + // read MB mode ***************************************************************** + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + currSE.mapping = linfo_ue; + + if (p_Vid->active_pps->entropy_coding_mode_flag == CABAC) + { + int skip; + // read MB skip_flag + if (currSlice->mb_aff_frame_flag && ((mb_nr&0x01) == 0||prevMbSkipped)) + field_flag_inference(currMB); + + CheckAvailabilityOfNeighborsCABAC(currMB); + TRACE_STRING("mb_skip_flag"); + skip = readMB_skip_flagInfo_CABAC(currMB, &dP->de_cabac); + + currMB->mb_type = !skip; + currMB->skip_flag = skip; + + currMB->ei_flag = 0; + + // read MB AFF + if (currSlice->mb_aff_frame_flag) + { + check_bottom=read_bottom=read_top=0; + if ((mb_nr&0x01)==0) + { + check_bottom = currMB->skip_flag; + read_top = !check_bottom; + } + else + { + read_bottom = (topMB->skip_flag && (!currMB->skip_flag)); + } + + if (read_bottom || read_top) + { + TRACE_STRING("mb_field_decoding_flag"); + currMB->mb_field = readFieldModeInfo_CABAC(currMB, &dP->de_cabac); + } + if (check_bottom) + check_next_mb_and_get_field_mode_CABAC(currSlice, dP); + + CheckAvailabilityOfNeighborsCABAC(currMB); + } + + // read MB type + if (currMB->mb_type != 0 ) + { + TRACE_STRING("mb_type"); + currMB->mb_type = readMB_typeInfo_CABAC(currMB, &dP->de_cabac); + currMB->ei_flag = 0; + } + } + // VLC Non-Intra + else + { + if(p_Vid->cod_counter == -1) + { + TRACE_STRING("mb_skip_run"); + readSyntaxElement_UVLC(&currSE, dP); + p_Vid->cod_counter = currSE.value1; + } + if (p_Vid->cod_counter==0) + { + // read MB aff + if ((currSlice->mb_aff_frame_flag) && (((mb_nr&0x01)==0) || ((mb_nr&0x01) && prevMbSkipped))) + { + TRACE_STRING("mb_field_decoding_flag"); + currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + } + + // read MB type + TRACE_STRING("mb_type"); + readSyntaxElement_UVLC(&currSE, dP); + if(currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE) + ++(currSE.value1); + currMB->mb_type = currSE.value1; + currMB->ei_flag = 0; + p_Vid->cod_counter--; + currMB->skip_flag = 0; + } + else + { + p_Vid->cod_counter--; + currMB->mb_type = 0; + currMB->ei_flag = 0; + currMB->skip_flag = 1; + + // read field flag of bottom block + if(currSlice->mb_aff_frame_flag) + { + if(p_Vid->cod_counter == 0 && ((mb_nr&0x01) == 0)) + { + TRACE_STRING("mb_field_decoding_flag (of coded bottom mb)"); + currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + dP->bitstream->frame_bitoffset--; + TRACE_DECBITS(1); + } + else if (p_Vid->cod_counter > 0 && ((mb_nr & 0x01) == 0)) + { + // check left macroblock pair first + if (mb_is_available(mb_nr - 2, currMB) && ((mb_nr % (p_Vid->PicWidthInMbs * 2))!=0)) + { + currMB->mb_field = p_Vid->mb_data[mb_nr-2].mb_field; + } + else + { + // check top macroblock pair + if (mb_is_available(mb_nr - 2*p_Vid->PicWidthInMbs, currMB)) + { + currMB->mb_field = p_Vid->mb_data[mb_nr-2*p_Vid->PicWidthInMbs].mb_field; + } + else + currMB->mb_field = FALSE; + } + } + } + } + } + + motion->mb_field[mb_nr] = (byte) currMB->mb_field; + + currMB->block_y_aff = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (mb_nr&0x01) ? (currMB->block_y - 4)>>1 : currMB->block_y >> 1 : currMB->block_y; + + p_Vid->siblock[currMB->mb_y][currMB->mb_x] = 0; + + currSlice->interpret_mb_mode(currMB); + + if(currSlice->mb_aff_frame_flag) + { + if(currMB->mb_field) + { + currSlice->num_ref_idx_l0_active <<=1; + currSlice->num_ref_idx_l1_active <<=1; + } + } + + //init NoMbPartLessThan8x8Flag + currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(p_Vid->active_sps->direct_8x8_inference_flag))? FALSE: TRUE; + + //====== READ 8x8 SUB-PARTITION MODES (modes of 8x8 blocks) and Intra VBST block modes ====== + if (currMB->mb_type == P8x8) + { + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + + if (p_Vid->active_pps->entropy_coding_mode_flag ==CAVLC) + { + currSE.mapping = linfo_ue; + for (i = 0; i < 4; ++i) + { + TRACE_STRING("sub_mb_type"); + readSyntaxElement_UVLC(&currSE, dP); + SetB8Mode (currMB, currSE.value1, i); + + //set NoMbPartLessThan8x8Flag for P8x8 mode + currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) || + (currMB->b8mode[i]==4); + } + } + else + { + for (i = 0; i < 4; ++i) + { + int value = readB8_typeInfo_CABAC(currSlice, &dP->de_cabac); + SetB8Mode (currMB, value, i); + + //set NoMbPartLessThan8x8Flag for P8x8 mode + currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) || + (currMB->b8mode[i]==4); + } + } + + //--- init macroblock data --- + init_macroblock (currMB); + currSlice->read_motion_info_from_NAL (currMB); + } + + //============= Transform Size Flag for INTRA MBs ============= + //------------------------------------------------------------- + //transform size flag for INTRA_4x4 and INTRA_8x8 modes + if (currMB->mb_type == I4MB && p_Vid->Transform8x8Mode) + { + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + TRACE_STRING("transform_size_8x8_flag"); + + // read CAVLC transform_size_8x8_flag + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + currMB->luma_transform_size_8x8_flag = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + } + else + { + currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &dP->de_cabac); + } + + if (currMB->luma_transform_size_8x8_flag) + { + currMB->mb_type = I8MB; + memset(&currMB->b8mode, I8MB, 4 * sizeof(char)); + memset(&currMB->b8pdir, -1, 4 * sizeof(char)); + } + } + else + { + currMB->luma_transform_size_8x8_flag = FALSE; + } + + if(p_Vid->active_pps->constrained_intra_pred_flag) + { + if( !IS_INTRA(currMB) ) + { + p_Vid->intra_block[mb_nr] = 0; + } + } + + //--- init macroblock data --- + if (currMB->mb_type != P8x8) + init_macroblock(currMB); + + if (IS_SKIP (currMB)) //keep last macroblock + { + skip_macroblock(currMB); + } + else if(currMB->mb_type != IPCM) + { + // intra prediction modes for a macroblock 4x4 ********************************************** + if (IS_INTRA(currMB)) + read_ipred_modes(currMB); + + // read inter frame vector data ********************************************************* + if (IS_INTERMV (currMB) && (currMB->mb_type != P8x8)) + { + currSlice->read_motion_info_from_NAL (currMB); + } + // read CBP and Coeffs *************************************************************** + currSlice->read_CBP_and_coeffs_from_NAL (currMB); + } + else + { + //read pcm_alignment_zero_bit and pcm_byte[i] + + // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the + // same category as MBTYPE + if ( currSlice->dp_mode && currSlice->dpB_NotPresent ) + { + concealIPCMcoeffs(currMB); + } + else + { + dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]); + read_IPCM_coeffs_from_NAL(currSlice, dP); + } + } + + return; +} + +/*! +************************************************************************ +* \brief +* Get the syntax elements from the NAL +************************************************************************ +*/ +static void read_one_macroblock_b_slice(Macroblock *currMB) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int i; + + SyntaxElement currSE; + int mb_nr = currMB->mbAddrX; + + DataPartition *dP; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + Macroblock *topMB = NULL; + int prevMbSkipped = 0; + int check_bottom, read_bottom, read_top; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + + if (currSlice->mb_aff_frame_flag) + { + if (mb_nr&0x01) + { + topMB= &p_Vid->mb_data[mb_nr-1]; + prevMbSkipped = topMB->skip_flag; + } + else + prevMbSkipped = 0; + } + + currMB->mb_field = ((mb_nr&0x01) == 0)? FALSE : p_Vid->mb_data[mb_nr-1].mb_field; + + update_qp(currMB, p_Vid->qp); + + // read MB mode ***************************************************************** + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + currSE.mapping = linfo_ue; + + if (p_Vid->active_pps->entropy_coding_mode_flag == CABAC) + { + // read MB skip_flag + int skip; + if (currSlice->mb_aff_frame_flag && ((mb_nr&0x01) == 0||prevMbSkipped)) + field_flag_inference(currMB); + + CheckAvailabilityOfNeighborsCABAC(currMB); + TRACE_STRING("mb_skip_flag"); + skip = readMB_skip_flagInfo_CABAC(currMB, &dP->de_cabac); + + currMB->mb_type = !skip; + currMB->skip_flag = skip; + + currMB->cbp = !skip; + + currMB->ei_flag = 0; + + if (skip) + p_Vid->cod_counter=0; + + // read MB AFF + if (currSlice->mb_aff_frame_flag) + { + check_bottom=read_bottom=read_top=0; + if ((mb_nr&0x01)==0) + { + check_bottom = currMB->skip_flag; + read_top = !check_bottom; + } + else + { + read_bottom = (topMB->skip_flag && (!currMB->skip_flag)); + } + + if (read_bottom || read_top) + { + TRACE_STRING("mb_field_decoding_flag"); + currMB->mb_field = readFieldModeInfo_CABAC(currMB, &dP->de_cabac); + } + if (check_bottom) + check_next_mb_and_get_field_mode_CABAC(currSlice,dP); + + CheckAvailabilityOfNeighborsCABAC(currMB); + } + + // read MB type + if (currMB->mb_type != 0 ) + { + TRACE_STRING("mb_type"); + currMB->mb_type = readMB_typeInfo_CABAC(currMB, &dP->de_cabac); + currMB->ei_flag = 0; + } + } + // VLC Non-Intra + else + { + if(p_Vid->cod_counter == -1) + { + TRACE_STRING("mb_skip_run"); + readSyntaxElement_UVLC(&currSE, dP); + p_Vid->cod_counter = currSE.value1; + } + if (p_Vid->cod_counter==0) + { + // read MB aff + if ((currSlice->mb_aff_frame_flag) && (((mb_nr&0x01)==0) || ((mb_nr&0x01) && prevMbSkipped))) + { + TRACE_STRING("mb_field_decoding_flag"); + currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + } + + // read MB type + TRACE_STRING("mb_type"); + readSyntaxElement_UVLC(&currSE, dP); + if(currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE) + ++(currSE.value1); + currMB->mb_type = currSE.value1; + currMB->ei_flag = 0; + p_Vid->cod_counter--; + currMB->skip_flag = 0; + } + else + { + p_Vid->cod_counter--; + currMB->mb_type = 0; + currMB->ei_flag = 0; + currMB->skip_flag = 1; + + // read field flag of bottom block + if(currSlice->mb_aff_frame_flag) + { + if(p_Vid->cod_counter == 0 && ((mb_nr&0x01) == 0)) + { + TRACE_STRING("mb_field_decoding_flag (of coded bottom mb)"); + currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + dP->bitstream->frame_bitoffset--; + TRACE_DECBITS(1); + } + else if (p_Vid->cod_counter > 0 && ((mb_nr & 0x01) == 0)) + { + // check left macroblock pair first + if (mb_is_available(mb_nr - 2, currMB) && ((mb_nr % (p_Vid->PicWidthInMbs * 2))!=0)) + { + currMB->mb_field = p_Vid->mb_data[mb_nr-2].mb_field; + } + else + { + // check top macroblock pair + if (mb_is_available(mb_nr - 2*p_Vid->PicWidthInMbs, currMB)) + { + currMB->mb_field = p_Vid->mb_data[mb_nr-2*p_Vid->PicWidthInMbs].mb_field; + } + else + currMB->mb_field = FALSE; + } + } + } + } + } + + motion->mb_field[mb_nr] = (byte) currMB->mb_field; + + currMB->block_y_aff = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (mb_nr&0x01) ? (currMB->block_y - 4)>>1 : currMB->block_y >> 1 : currMB->block_y; + + p_Vid->siblock[currMB->mb_y][currMB->mb_x] = 0; + + currSlice->interpret_mb_mode(currMB); + + if(currSlice->mb_aff_frame_flag) + { + if(currMB->mb_field) + { + currSlice->num_ref_idx_l0_active <<=1; + currSlice->num_ref_idx_l1_active <<=1; + } + } + + //init NoMbPartLessThan8x8Flag + currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(p_Vid->active_sps->direct_8x8_inference_flag))? FALSE: TRUE; + + //====== READ 8x8 SUB-PARTITION MODES (modes of 8x8 blocks) and Intra VBST block modes ====== + if (currMB->mb_type == P8x8) + { + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + + if (p_Vid->active_pps->entropy_coding_mode_flag ==CAVLC) + { + currSE.mapping = linfo_ue; + for (i = 0; i < 4; ++i) + { + TRACE_STRING("sub_mb_type"); + readSyntaxElement_UVLC(&currSE, dP); + SetB8Mode (currMB, currSE.value1, i); + + //set NoMbPartLessThan8x8Flag for P8x8 mode + currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) || + (currMB->b8mode[i]==4); + } + } + else + { + for (i = 0; i < 4; ++i) + { + int value = readB8_typeInfo_CABAC(currSlice, &dP->de_cabac); + SetB8Mode (currMB, value, i); + + //set NoMbPartLessThan8x8Flag for P8x8 mode + currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) || + (currMB->b8mode[i]==4); + } + } + + //--- init macroblock data --- + init_macroblock (currMB); + currSlice->read_motion_info_from_NAL (currMB); + } + + //============= Transform Size Flag for INTRA MBs ============= + //------------------------------------------------------------- + //transform size flag for INTRA_4x4 and INTRA_8x8 modes + if (currMB->mb_type == I4MB && p_Vid->Transform8x8Mode) + { + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + TRACE_STRING("transform_size_8x8_flag"); + + // read CAVLC transform_size_8x8_flag + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + currMB->luma_transform_size_8x8_flag = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + } + else + { + currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &dP->de_cabac); + } + + + if (currMB->luma_transform_size_8x8_flag) + { + currMB->mb_type = I8MB; + memset(&currMB->b8mode, I8MB, 4 * sizeof(char)); + memset(&currMB->b8pdir, -1, 4 * sizeof(char)); + } + } + else + { + currMB->luma_transform_size_8x8_flag = FALSE; + } + + if(p_Vid->active_pps->constrained_intra_pred_flag) // inter frame + { + if( !IS_INTRA(currMB) ) + { + p_Vid->intra_block[mb_nr] = 0; + } + } + + //--- init macroblock data --- + if (currMB->mb_type != P8x8) + init_macroblock(currMB); + + if (IS_DIRECT (currMB) && p_Vid->cod_counter >= 0) + { + currMB->cbp = 0; + reset_coeffs(currSlice); + + if (p_Vid->active_pps->entropy_coding_mode_flag ==CABAC) + p_Vid->cod_counter=-1; + } + else if (IS_SKIP (currMB)) //keep last macroblock + { + skip_macroblock(currMB); + } + else if(currMB->mb_type != IPCM) + { + // intra prediction modes for a macroblock 4x4 ********************************************** + if (IS_INTRA(currMB)) + read_ipred_modes(currMB); + + // read inter frame vector data ********************************************************* + if (IS_INTERMV (currMB) && (currMB->mb_type != P8x8)) + { + currSlice->read_motion_info_from_NAL (currMB); + } + // read CBP and Coeffs *************************************************************** + currSlice->read_CBP_and_coeffs_from_NAL (currMB); + } + else + { + //read pcm_alignment_zero_bit and pcm_byte[i] + + // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the + // same category as MBTYPE + if ( currSlice->dp_mode && currSlice->dpB_NotPresent ) + { + concealIPCMcoeffs(currMB); + } + else + { + dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]); + read_IPCM_coeffs_from_NAL(currSlice, dP); + } + } + + return; +} + + +/*! +************************************************************************ +* \brief +* Initialize decoding engine after decoding an IPCM macroblock +* (for IPCM CABAC 28/11/2003) +* +* \author +* Dong Wang <Dong.Wang@bristol.ac.uk> +************************************************************************ +*/ +static void init_decoding_engine_IPCM(Slice *currSlice) +{ + Bitstream *currStream; + int ByteStartPosition; + int PartitionNumber; + int i; + + if(currSlice->dp_mode==PAR_DP_1) + PartitionNumber=1; + else if(currSlice->dp_mode==PAR_DP_3) + PartitionNumber=3; + else + { + printf("Partition Mode is not supported\n"); + exit(1); + } + + for(i=0;i<PartitionNumber;++i) + { + currStream = currSlice->partArr[i].bitstream; + ByteStartPosition = currStream->read_len; + + arideco_start_decoding (&currSlice->partArr[i].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len); + } +} + + + + +/*! +************************************************************************ +* \brief +* Read IPCM pcm_alignment_zero_bit and pcm_byte[i] from stream to currSlice->ipcm +* (for IPCM CABAC and IPCM CAVLC) +* +* \author +* Dong Wang <Dong.Wang@bristol.ac.uk> +************************************************************************ +*/ + +static void read_IPCM_coeffs_from_NAL(Slice *currSlice, struct datapartition *dP) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + + StorablePicture *dec_picture = p_Vid->dec_picture; + int i,j; + + //For CABAC, we don't need to read bits to let stream byte aligned + // because we have variable for integer bytes position + if(p_Vid->active_pps->entropy_coding_mode_flag == CABAC) + { + readIPCM_CABAC(currSlice, dP); + init_decoding_engine_IPCM(currSlice); + } + else + { + //read bits to let stream byte aligned + + if(((dP->bitstream->frame_bitoffset) & 0x07) != 0) + { + TRACE_STRING("pcm_alignment_zero_bit"); + readSyntaxElement_FLC(dP->bitstream, (8 - ((dP->bitstream->frame_bitoffset) & 0x07))); + } + + //read luma and chroma IPCM coefficients + TRACE_STRING("pcm_sample_luma"); + + for(i=0;i<MB_BLOCK_SIZE;++i) + { + for(j=0;j<MB_BLOCK_SIZE;++j) + { + currSlice->ipcm[0][i][j] = readSyntaxElement_FLC(dP->bitstream, p_Vid->bitdepth_luma); + } + } + if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid)) + { + TRACE_STRING("pcm_sample_chroma (u)"); + for(i=0;i<p_Vid->mb_cr_size_y;++i) + { + for(j=0;j<p_Vid->mb_cr_size_x;++j) + { + currSlice->ipcm[1][i][j] = readSyntaxElement_FLC(dP->bitstream, p_Vid->bitdepth_chroma); + } + } + TRACE_STRING("pcm_sample_chroma (v)"); + for(i=0;i<p_Vid->mb_cr_size_y;++i) + { + for(j=0;j<p_Vid->mb_cr_size_x;++j) + { + currSlice->ipcm[2][i][j] = readSyntaxElement_FLC(dP->bitstream, p_Vid->bitdepth_chroma); + } + } + } + } +} + + +/*! +************************************************************************ +* \brief +* If data partition B is lost, conceal PCM sample values with DC. +* +************************************************************************ +*/ + + +static void __forceinline read_ipred_iblock(VideoParameters *p_Vid, Macroblock *currMB, Slice *currSlice, DataPartition *dP, int b8) +{ + int i, j; + int mostProbableIntraPredMode; + int upIntraPredMode; + int leftIntraPredMode; + int bx, by, bi, bj; + SyntaxElement currSE; + int ts, ls; + PixelPos left_block, top_block; + int dec; + + for(j=0;j<2;j++) //loop subblocks + { + by = (b8&2) + j; + bj = currMB->block_y + by; + for(i=0;i<2;i++) + { + int pred_mode; + bx = ((b8&1)<<1) + i; + bi = currMB->block_x + bx; + + //get from stream + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + readSyntaxElement_Intra4x4PredictionMode(&currSE, dP->bitstream); + pred_mode = currSE.value1; + } + else + { + pred_mode = readIntraPredMode_CABAC(currSlice, &dP->de_cabac); + } + + p_Vid->getNeighbourXPLumaNB(currMB, (bx<<2) - 1, (by<<2), &left_block); + p_Vid->getNeighbourPXLumaNB(currMB, (bx<<2), (by<<2) - 1, &top_block ); + + //get from array and decode + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + left_block.available = left_block.available ? p_Vid->intra_block[left_block.mb_addr] : 0; + top_block.available = top_block.available ? p_Vid->intra_block[top_block.mb_addr] : 0; + } + + // !! KS: not sure if the following is still correct... + ts = ls = 0; // Check to see if the neighboring block is SI + if (currMB->mb_type == I4MB && currSlice->slice_type == SI_SLICE) // need support for MBINTLC1 + { + if (left_block.available) + if (p_Vid->siblock [left_block.mb_addr / p_Vid->PicWidthInMbs][left_block.mb_addr % p_Vid->PicWidthInMbs]) + ls=1; + + if (top_block.available) + if (p_Vid->siblock [top_block.mb_addr / p_Vid->PicWidthInMbs][top_block.mb_addr % p_Vid->PicWidthInMbs]) + ts=1; + } + + upIntraPredMode = (top_block.available &&(ts == 0)) ? p_Vid->ipredmode[top_block.pos_y>>2 ][top_block.pos_x>>2 ] : -1; + leftIntraPredMode = (left_block.available &&(ls == 0)) ? p_Vid->ipredmode[left_block.pos_y>>2][left_block.pos_x>>2] : -1; + + mostProbableIntraPredMode = (upIntraPredMode < 0 || leftIntraPredMode < 0) ? DC_PRED : upIntraPredMode < leftIntraPredMode ? upIntraPredMode : leftIntraPredMode; + + dec = (pred_mode == -1) ? mostProbableIntraPredMode : pred_mode + (pred_mode >= mostProbableIntraPredMode); + + + p_Vid->ipredmode[bj][bi] = dec; + } + } +} + +static void __forceinline read_ipred_i8mb(VideoParameters *p_Vid, Macroblock *currMB, Slice *currSlice, DataPartition *dP, int b8) +{ + int mostProbableIntraPredMode; + int upIntraPredMode; + int leftIntraPredMode; + int bx, by, bi, bj; + int pred_mode; + SyntaxElement currSE; + int ts, ls; + PixelPos left_block, top_block; + int dec; + + by = (b8&2); + bj = currMB->block_y + by; + + bx = ((b8&1)<<1); + bi = currMB->block_x + bx; + + //get from stream + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + readSyntaxElement_Intra4x4PredictionMode(&currSE, dP->bitstream); + pred_mode = currSE.value1; + } + else + { + pred_mode = readIntraPredMode_CABAC(currSlice, &dP->de_cabac); + } + + p_Vid->getNeighbourXPLumaNB(currMB, (bx<<2) - 1, (by<<2), &left_block); + p_Vid->getNeighbourPXLumaNB(currMB, (bx<<2), (by<<2) - 1, &top_block ); + + //get from array and decode + + if (p_Vid->active_pps->constrained_intra_pred_flag) + { + left_block.available = left_block.available ? p_Vid->intra_block[left_block.mb_addr] : 0; + top_block.available = top_block.available ? p_Vid->intra_block[top_block.mb_addr] : 0; + } + + // !! KS: not sure if the following is still correct... + ts = ls = 0; // Check to see if the neighboring block is SI + if (currMB->mb_type == I4MB && currSlice->slice_type == SI_SLICE) // need support for MBINTLC1 + { + if (left_block.available) + if (p_Vid->siblock [left_block.mb_addr / p_Vid->PicWidthInMbs][left_block.mb_addr % p_Vid->PicWidthInMbs]) + ls=1; + + if (top_block.available) + if (p_Vid->siblock [top_block.mb_addr / p_Vid->PicWidthInMbs][top_block.mb_addr % p_Vid->PicWidthInMbs]) + ts=1; + } + + upIntraPredMode = (top_block.available &&(ts == 0)) ? p_Vid->ipredmode[top_block.pos_y>>2 ][top_block.pos_x>>2 ] : -1; + leftIntraPredMode = (left_block.available &&(ls == 0)) ? p_Vid->ipredmode[left_block.pos_y>>2][left_block.pos_x>>2] : -1; + + mostProbableIntraPredMode = (upIntraPredMode < 0 || leftIntraPredMode < 0) ? DC_PRED : upIntraPredMode < leftIntraPredMode ? upIntraPredMode : leftIntraPredMode; + + dec = (pred_mode == -1) ? mostProbableIntraPredMode : pred_mode + (pred_mode >= mostProbableIntraPredMode); + + //set + p_Vid->ipredmode[bj][bi] = dec; + p_Vid->ipredmode[bj][bi+1] = dec; + p_Vid->ipredmode[bj+1][bi] = dec; + p_Vid->ipredmode[bj+1][bi+1] = dec; +} + +static void read_ipred_modes(Macroblock *currMB) +{ + int b8; + SyntaxElement currSE; + DataPartition *dP; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + VideoParameters *p_Vid = currMB->p_Vid; + + StorablePicture *dec_picture = p_Vid->dec_picture; + char IntraChromaPredModeFlag = IS_INTRA(currMB); + + dP = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]); + + for(b8 = 0; b8 < 4; ++b8) //loop 8x8 blocks + { + if (currMB->b8mode[b8]==IBLOCK) + { + IntraChromaPredModeFlag = 1; + read_ipred_iblock(p_Vid, currMB, currSlice, dP, b8); + } + else if (currMB->b8mode[b8]==I8MB) + { + IntraChromaPredModeFlag = 1; + read_ipred_i8mb(p_Vid, currMB, currSlice, dP, b8); + } + } + + if (IntraChromaPredModeFlag && (dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + TRACE_STRING("intra_chroma_pred_mode"); + dP = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]); + + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + currSE.mapping = linfo_ue; + readSyntaxElement_UVLC(&currSE, dP); + currMB->c_ipred_mode = (char) currSE.value1; + } + else + { + currMB->c_ipred_mode = readCIPredMode_CABAC(currMB, &dP->de_cabac); + } + + + + if (currMB->c_ipred_mode < DC_PRED_8 || currMB->c_ipred_mode > PLANE_8) + { + error("illegal chroma intra pred mode!\n", 600); + } + } +} + + +/*! +************************************************************************ +* \brief +* Get current block spatial neighbors +************************************************************************ +*/ +void get_neighbors(Macroblock *currMB, // <-- current Macroblock + PixelPos *block, // <--> neighbor blocks + int mb_x, // <-- block x position + int mb_y, // <-- block y position + int blockshape_x // <-- block width + ) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int i; + p_Vid->getNeighbourXPLumaNB(currMB, mb_x - 1, mb_y , &block[0]); // left + p_Vid->getNeighbourPXLumaNB(currMB, mb_x, mb_y - 1, &block[1]); // up + p_Vid->getNeighbourPXLuma(currMB, mb_x + blockshape_x, mb_y - 1, &block[2]); // upper right + p_Vid->getNeighbourLuma(currMB, mb_x - 1, mb_y - 1, &block[3]); // upper left + for (i = 0; i < 4; i++) + { + block[i].pos_x >>= 2; + block[i].pos_y >>= 2; + } + + if (mb_y > 0) + { + if (mb_x < 8) // first column of 8x8 blocks + { + if (mb_y == 8 ) + { + if (blockshape_x == MB_BLOCK_SIZE) + block[2].available = 0; + } + else if (mb_x+blockshape_x == 8) + { + block[2].available = 0; + } + } + else if (mb_x + blockshape_x == MB_BLOCK_SIZE) + { + block[2].available = 0; + } + } + + if (!block[2].available) + { + block[2] = block[3]; + } +} + +/* this version is for mb_x == 0, mb_y == 0 and blockshape_x == 16 */ +void get_neighbors0016(Macroblock *currMB, // <-- current Macroblock + PixelPos *block // <--> neighbor blocks + ) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int i; + + p_Vid->getNeighbourLeftLuma(currMB, &block[0]); // left + p_Vid->getNeighbourPXLumaNB(currMB, 0, -1, &block[1]); // up + p_Vid->getNeighbourPXLuma(currMB, 16, -1, &block[2]); // upper right + p_Vid->getNeighbourLuma(currMB, -1, -1, &block[3]); // upper left + for (i = 0; i < 4; i++) + { + if (block[i].available) + { + block[i].pos_x >>= 2; + block[i].pos_y >>= 2; + } + } + + if (!block[2].available) + { + block[2] = block[3]; + } +} + +/*! +************************************************************************ +* \brief +* Read motion info +************************************************************************ +*/ +static void read_motion_info_from_NAL_p_slice(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + + int mb_nr = currMB->mbAddrX; + + DataPartition *dP = NULL; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + int partmode = ((currMB->mb_type == P8x8) ? 4 : currMB->mb_type); + int step_h0 = BLOCK_STEP [partmode][0]; + int step_v0 = BLOCK_STEP [partmode][1]; + h264_ref_t *pic_num; + + int j4; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + + int list_offset = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field))? (mb_nr&0x01) ? 4 : 2 : 0; + + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + SyntaxElement currSE; + //===== READ REFERENCE PICTURE INDICES ===== + dP = &(currSlice->partArr[partMap[SE_REFFRAME]]); + // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB *********** + prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l0_active, (currMB->mb_type != P8x8) || (!p_Vid->allrefzero)); + readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + + // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB *********** + prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l1_active, (currMB->mb_type != P8x8) || (!p_Vid->allrefzero)); + readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + + //===== READ MOTION VECTORS ===== + dP = &(currSlice->partArr[partMap[SE_MVD]]); + + currSE.mapping = linfo_se; + readMBMotionVectors (&currSE, dP, currMB, LIST_0, step_h0, step_v0); + } + else + { + if (currMB->mb_type != P8x8 || !p_Vid->allrefzero) + { + //===== READ REFERENCE PICTURE INDICES ===== + dP = &(currSlice->partArr[partMap[SE_REFFRAME]]); + if (currSlice->num_ref_idx_l0_active > 1) + { + // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB *********** + readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + } + else + { + readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + } + + if (currSlice->num_ref_idx_l1_active > 1) + { + // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB *********** + readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + } + else + { + readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + } + } + else + { + readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + } + //===== READ MOTION VECTORS ===== + dP = &(currSlice->partArr[partMap[SE_MVD]]); + + readMBMotionVectors_CABAC(dP, currMB, LIST_0, step_h0, step_v0); + + } + + // LIST_0 Motion vectors + + + // record reference picture Ids for deblocking decisions + pic_num = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset]; + for(j4 = currMB->block_y; j4 < (currMB->block_y +4);++j4) + { + PicMotion *ref = &motion->motion[LIST_0][j4][currMB->block_x]; + ref[0].ref_pic_id = (ref[0].ref_idx >= 0)?pic_num[(short)ref[0].ref_idx]:UNDEFINED_REFERENCE; + ref[1].ref_pic_id = (ref[1].ref_idx >= 0)?pic_num[(short)ref[1].ref_idx]:UNDEFINED_REFERENCE; + ref[2].ref_pic_id = (ref[2].ref_idx >= 0)?pic_num[(short)ref[2].ref_idx]:UNDEFINED_REFERENCE; + ref[3].ref_pic_id = (ref[3].ref_idx >= 0)?pic_num[(short)ref[3].ref_idx]:UNDEFINED_REFERENCE; + } +} + +/*! +************************************************************************ +* \brief +* Read motion info +************************************************************************ +*/ +static void read_motion_info_from_NAL_b_slice (Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + int i,j,k; + int mb_nr = currMB->mbAddrX; + DataPartition *dP = NULL; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + int partmode = ((currMB->mb_type == P8x8) ? 4 : currMB->mb_type); + int step_h0 = BLOCK_STEP [partmode][0]; + int step_v0 = BLOCK_STEP [partmode][1]; + + int i0, j0, j6; + + int j4, i4, ii; + StorablePicture *dec_picture = p_Vid->dec_picture; + PicMotionParams *motion = &dec_picture->motion; + MotionParams *colocated; + + int mv_scale = 0; + + int list_offset = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field))? (mb_nr&0x01) ? 4 : 2 : 0; + + + if ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) + { + if(mb_nr&0x01) + { + colocated = &currSlice->p_colocated->bottom; + } + else + { + colocated = &currSlice->p_colocated->top; + } + } + else + { + colocated = &currSlice->p_colocated->frame; + } + + if (currMB->mb_type == P8x8) + { + if (currSlice->direct_spatial_mv_pred_flag) + { + char l0_rFrame, l1_rFrame; + short pmvl0[2]={0,0}, pmvl1[2]={0,0}; + + prepare_direct_params(currMB, dec_picture, pmvl0, pmvl1, &l0_rFrame, &l1_rFrame); + + for (k = 0; k < 4; ++k) + { + if (currMB->b8mode[k] == 0) + { + i = currMB->block_x + 2 * (k & 0x01); + for(j = 2 * (k >> 1); j < 2 * (k >> 1)+2;++j) + { + j6 = currMB->block_y_aff + j; + j4 = currMB->block_y + j; + for(i4 = i; i4 < i + 2; ++i4) + { + if (l0_rFrame >= 0) + { + if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = 0; + } + else + { + motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0]; + motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1]; + motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame; + } + } + else + { + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = -1; + } + + if (l1_rFrame >= 0) + { + if (l1_rFrame==0 && ((!colocated->moving_block[j6][i4])&& (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + else + { + motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0]; + motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1]; + motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame; + } + } + else + { + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = -1; + } + + if (l0_rFrame <0 && l1_rFrame <0) + { + motion->motion[LIST_0][j4][i4].ref_idx = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + } + } + } + } + } + else + { + for (k = 0; k < 4; ++k) // Scan all blocks + { + if (currMB->b8mode[k] == 0) + { + for(j0 = 2 * (k >> 1); j0 < 2 * (k >> 1) + 2; j0 += step_v0) + { + for(i0 = currMB->block_x + 2*(k & 0x01); i0 < currMB->block_x + 2 * (k & 0x01)+2; i0 += step_h0) + { + int refList = colocated->motion[LIST_0 ][currMB->block_y_aff + j0][i0].ref_idx== -1 ? LIST_1 : LIST_0; + int ref_idx = colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_idx; + int mapped_idx = -1, iref; + + if (ref_idx == -1) + { + for (j4 = currMB->block_y + j0; j4 < currMB->block_y + j0 + step_v0; ++j4) + { + int h; + for (h=0;h<step_h0;h++) + { + PicMotion *m0 = &motion->motion[LIST_0][j4][i0+h]; + PicMotion *m1 = &motion->motion[LIST_1][j4][i0+h]; + m0->ref_idx = 0; + m1->ref_idx = 0; + memset(&m0->mv, 0, sizeof(MotionVector)); + memset(&m1->mv, 0, sizeof(MotionVector)); + } + } + } + else + { + for (iref = 0; iref < imin(currSlice->num_ref_idx_l0_active, p_Vid->listXsize[LIST_0 + list_offset]); ++iref) + { + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + + if(p_Vid->structure==0 && curr_mb_field==0) + { + // If the current MB is a frame MB and the colocated is from a field picture, + // then the colocated->ref_pic_id may have been generated from the wrong value of + // frame_poc if it references it's complementary field, so test both POC values + if(p_Vid->listX[0][iref]->top_poc * 2 == colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_pic_id + || p_Vid->listX[0][iref]->bottom_poc * 2 == colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_pic_id) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + continue; + } + if (dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][iref]==colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_pic_id) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + } + + if (INVALIDINDEX == mapped_idx) + { + error("temporal direct error: colocated block has ref that is unavailable",-1111); + } + + for (j = j0; j < j0 + step_v0; ++j) + { + j4 = currMB->block_y + j; + j6 = currMB->block_y_aff + j; + + for (i4 = i0; i4 < i0 + step_h0; ++i4) + { + mv_scale = currSlice->mvscale[LIST_0 + list_offset][mapped_idx]; + + motion->motion[LIST_0][j4][i4].ref_idx = (char) mapped_idx; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + + if (mv_scale == 9999 || p_Vid->listX[LIST_0+list_offset][mapped_idx]->is_long_term) + { + for (ii=0; ii < 2; ++ii) + { + motion->motion[LIST_0][j4][i4].mv[ii] = colocated->motion[refList][j6][i4].mv[ii]; + motion->motion[LIST_1][j4][i4].mv[ii] = 0; + } + } + else + { + for (ii=0; ii < 2; ++ii) + { + motion->motion[LIST_0][j4][i4].mv[ii] = (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[ii] + 128 ) >> 8); + motion->motion[LIST_1][j4][i4].mv[ii] = (short) (motion->motion[LIST_0][j4][i4].mv[ii] - colocated->motion[refList][j6][i4].mv[ii]); + } + } + } + } + } + } + } + } + } + } + } + + + + if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC) + { + SyntaxElement currSE; + //===== READ REFERENCE PICTURE INDICES ===== + dP = &(currSlice->partArr[partMap[SE_REFFRAME]]); + // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB *********** + prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l0_active, TRUE); + readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + + // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB *********** + prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l1_active, TRUE); + readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + + //===== READ MOTION VECTORS ===== + dP = &(currSlice->partArr[partMap[SE_MVD]]); + + currSE.mapping = linfo_se; + // LIST_0 Motion vectors + readMBMotionVectors (&currSE, dP, currMB, LIST_0, step_h0, step_v0); + // LIST_1 Motion vectors + readMBMotionVectors (&currSE, dP, currMB, LIST_1, step_h0, step_v0); + } + else + { + //===== READ REFERENCE PICTURE INDICES ===== + dP = &(currSlice->partArr[partMap[SE_REFFRAME]]); + if (currSlice->num_ref_idx_l0_active>1) + { + // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB *********** + readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + } + else + { + readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0); + } + + if (currSlice->num_ref_idx_l1_active > 1) + { + // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB *********** + readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + } + else + { + readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0); + } + + //===== READ MOTION VECTORS ===== + dP = &(currSlice->partArr[partMap[SE_MVD]]); + + // LIST_0 Motion vectors + readMBMotionVectors_CABAC(dP, currMB, LIST_0, step_h0, step_v0); + // LIST_1 Motion vectors + readMBMotionVectors_CABAC(dP, currMB, LIST_1, step_h0, step_v0); + } + + + + // record reference picture Ids for deblocking decisions + + for (k = LIST_0; k <= LIST_1; ++k) + { + const h264_ref_t *rec_pic_num = dec_picture->ref_pic_num[p_Vid->current_slice_nr][k+list_offset]; + PicMotion **list_motion = &motion->motion[k][currMB->block_y]; + for(j4 = 0; j4 < 4 ;++j4) + { + PicMotion *m = &list_motion[j4][currMB->block_x]; + m[0].ref_pic_id = (m[0].ref_idx>=0)?rec_pic_num[(short)m[0].ref_idx]:UNDEFINED_REFERENCE; + m[1].ref_pic_id = (m[1].ref_idx>=0)?rec_pic_num[(short)m[1].ref_idx]:UNDEFINED_REFERENCE; + m[2].ref_pic_id = (m[2].ref_idx>=0)?rec_pic_num[(short)m[2].ref_idx]:UNDEFINED_REFERENCE; + m[3].ref_pic_id = (m[3].ref_idx>=0)?rec_pic_num[(short)m[3].ref_idx]:UNDEFINED_REFERENCE; + } + } +} + +/*! +************************************************************************ +* \brief +* Get the Prediction from the Neighboring Blocks for Number of +* Nonzero Coefficients +* +* Luma Blocks +************************************************************************ +*/ +static int predict_nnz_cb(Macroblock *currMB, int i,int j) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + + // left block + p_Vid->getNeighbourLuma(currMB, i - 1, j, &pix); + + if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][pix.x>>2]; + ++cnt; + } + + // top block + p_Vid->getNeighbourLuma(currMB, i, j - 1, &pix); + + if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][pix.x>>2]; + ++cnt; + } + + if (cnt==2) + { + ++pred_nnz; + pred_nnz>>=1; + } + + return pred_nnz; +} + + +static int predict_nnz_cr(Macroblock *currMB, int i,int j) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + + // left block + p_Vid->getNeighbourLuma(currMB, i - 1, j, &pix); + + if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][2][pix.y>>2][pix.x>>2]; + ++cnt; + } + + // top block + p_Vid->getNeighbourLuma(currMB, i, j - 1, &pix); + + if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][2][pix.y>>2][pix.x>>2]; + ++cnt; + } + + if (cnt==2) + { + ++pred_nnz; + pred_nnz>>=1; + } + + return pred_nnz; +} + + +static int predict_nnz_luma(Macroblock *currMB, int i,int j) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + + // left block + p_Vid->getNeighbourXPLuma(currMB, i - 1, j, &pix); + + if (pix.available) + { + pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2]; + ++cnt; + } + + // top block + p_Vid->getNeighbourPXLuma(currMB, i, j - 1, &pix); + + if (pix.available) + { + pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2]; + ++cnt; + } + + if (cnt==2) + { + ++pred_nnz; + pred_nnz>>=1; + } + + return pred_nnz; +} + + +static int predict_nnz_luma_intra(Macroblock *currMB, int i,int j) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + + // left block + p_Vid->getNeighbourXPLuma(currMB, i - 1, j, &pix); + + if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2]; + ++cnt; + } + + // top block + p_Vid->getNeighbourPXLuma(currMB, i, j - 1, &pix); + + if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2]; + ++cnt; + } + + if (cnt==2) + { + ++pred_nnz; + pred_nnz>>=1; + } + + return pred_nnz; +} + + +/*! +************************************************************************ +* \brief +* Get the Prediction from the Neighboring Blocks for Number of +* Nonzero Coefficients +* +* Chroma Blocks +************************************************************************ +*/ +static int predict_nnz_chroma_inter(Macroblock *currMB, int i,int j) +{ + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + + if (dec_picture->chroma_format_idc != YUV444) + { + //YUV420 and YUV422 + // left block + p_Vid->getNeighbour(currMB, ((i&0x01)<<2) - 1, j, p_Vid->mb_size[IS_CHROMA], &pix); + if (pix.available) + { + pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)]; + ++cnt; + } + + // top block + p_Vid->getNeighbour(currMB, ((i&0x01)<<2), j - 1, p_Vid->mb_size[IS_CHROMA], &pix); + if (pix.available) + { + pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)]; + ++cnt; + } + + if (cnt==2) + { + ++pred_nnz; + pred_nnz >>= 1; + } + } + + return pred_nnz; +} + + +static int predict_nnz_chroma_intra(Macroblock *currMB, int i,int j) +{ + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + + if (dec_picture->chroma_format_idc != YUV444) + { + //YUV420 and YUV422 + // left block + p_Vid->getNeighbour(currMB, ((i&0x01)<<2) - 1, j, p_Vid->mb_size[IS_CHROMA], &pix); + + if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)]; + ++cnt; + } + + // top block + p_Vid->getNeighbour(currMB, ((i&0x01)<<2), j - 1, p_Vid->mb_size[IS_CHROMA], &pix); + + if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= p_Vid->intra_block[pix.mb_addr]; + if (!pix.available) + ++cnt; + } + + if (pix.available) + { + pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)]; + ++cnt; + } + + if (cnt==2) + { + ++pred_nnz; + pred_nnz >>= 1; + } + } + + return pred_nnz; +} + + +/*! +************************************************************************ +* \brief +* Reads coeff of an 4x4 block (CAVLC) +* +* \author +* Karl Lillevold <karll@real.com> +* contributions by James Au <james@ubvideo.com> +************************************************************************ +*/ +static void readCoeff4x4_CAVLC_Luma (Macroblock *currMB, + int i, int j, int levarr[16], int runarr[16], + int *number_coefficients) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int mb_nr = currMB->mbAddrX; + SyntaxElement currSE; + DataPartition *dP; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + Bitstream *currStream; + + int k, code, vlcnum; + int numcoeff = 0, numtrailingones, numcoeff_vlc; + int level_two_or_higher; + int numones, totzeros, abslevel; + int zerosleft; + int nnz; + static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6 + + p_Vid->nz_coeff[mb_nr][0][j][i] = 0; + + if (IS_INTRA (currMB)) + { + dP = &(currSlice->partArr[partMap[SE_LUM_AC_INTRA]]); + nnz = predict_nnz_luma_intra(currMB, i<<2, j<<2); + } + else + { + dP = &(currSlice->partArr[partMap[SE_LUM_AC_INTER]]); + nnz = predict_nnz_luma(currMB, i<<2, j<<2); + } + + if (nnz < 2) + { + numcoeff_vlc = 0; + } + else if (nnz < 4) + { + numcoeff_vlc = 1; + } + else if (nnz < 8) + { + numcoeff_vlc = 2; + } + else // + { + numcoeff_vlc = 3; + } + + currStream = dP->bitstream; + readSyntaxElement_NumCoeffTrailingOnes(&currSE, currStream, numcoeff_vlc); + + numcoeff = currSE.value1; + numtrailingones = currSE.value2; + + p_Vid->nz_coeff[mb_nr][0][j][i] = (byte) numcoeff; + + memzero64(levarr); + memzero64(runarr); + + numones = numtrailingones; + *number_coefficients = numcoeff; + + if (numcoeff) + { + if (numtrailingones) + { + code = readSyntaxElement_FLC(currStream, numtrailingones); + + for (k=0;k<numtrailingones;k++) + { +#ifdef _M_IX86 + levarr[k+numcoeff-numtrailingones] = ((_bittest((const long *)&code, k)<<1) ^ 0xFFFFFFFF) + 2; +#else + levarr[k+numcoeff-numtrailingones] = (code>>k)&1 ? -1:1; +#endif + } + } + + // decode levels + level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1; + vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0; + + for (k = numcoeff - 1 - numtrailingones; k >= 0; k--) + { + int level; + if (vlcnum == 0) + level=readSyntaxElement_Level_VLC0(currStream); + else + level=readSyntaxElement_Level_VLCN(vlcnum, currStream); + + if (level_two_or_higher) + { + level += (level > 0) ? 1 : -1; + level_two_or_higher = 0; + } + + levarr[k] = level; + abslevel = iabs(levarr[k]); + if (abslevel == 1) + ++numones; + + // update VLC table + if (abslevel > incVlc[vlcnum]) + ++vlcnum; + + if (k == numcoeff - 1 - numtrailingones && abslevel >3) + vlcnum = 2; + } + + if (numcoeff < 16) + { + // decode total run + vlcnum = numcoeff - 1; + totzeros = readSyntaxElement_TotalZeros(currStream, vlcnum); + } + else + { + totzeros = 0; + } + + // decode run before each coefficient + zerosleft = totzeros; + i = numcoeff - 1; + + if (zerosleft > 0 && i > 0) + { + do + { + // select VLC for runbefore + vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1); + + runarr[i] = readSyntaxElement_Run(currStream, vlcnum); + + zerosleft -= runarr[i]; + i --; + } while (zerosleft != 0 && i != 0); + } + runarr[i] = zerosleft; + } // if numcoeff +} + + +static void readCoeff4x4_CAVLC_ChromaAC(Macroblock *currMB, + int i, int j, int levarr[16], int runarr[16], + int *number_coefficients) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int mb_nr = currMB->mbAddrX; + SyntaxElement currSE; + DataPartition *dP; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + Bitstream *currStream; + + int k, code, vlcnum; + int numcoeff = 0, numtrailingones, numcoeff_vlc; + int level_two_or_higher; + int numones, totzeros, abslevel; + int zerosleft, ntr; + int nnz; + static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6 + + TRACE_PRINTF("ChrDC"); + p_Vid->nz_coeff[mb_nr][0][j][i] = 0; + + if (IS_INTRA (currMB)) + { + dP = &(currSlice->partArr[partMap[SE_CHR_AC_INTRA]]); + nnz = predict_nnz_chroma_intra(currMB, i, ((j-4)<<2)); + } + else + { + dP = &(currSlice->partArr[partMap[SE_CHR_AC_INTER]]); + nnz = predict_nnz_chroma_inter(currMB, i, ((j-4)<<2)); + } + currStream = dP->bitstream; + + + // luma or chroma AC + + if (nnz < 2) + { + numcoeff_vlc = 0; + } + else if (nnz < 4) + { + numcoeff_vlc = 1; + } + else if (nnz < 8) + { + numcoeff_vlc = 2; + } + else // + { + numcoeff_vlc = 3; + } + + readSyntaxElement_NumCoeffTrailingOnes(&currSE, currStream, numcoeff_vlc); + + numcoeff = currSE.value1; + numtrailingones = currSE.value2; + + + p_Vid->nz_coeff[mb_nr][0][j][i] = (byte) numcoeff; + + memzero64(levarr); + memzero64(runarr); + + numones = numtrailingones; + *number_coefficients = numcoeff; + + if (numcoeff) + { + if (numtrailingones) + { + code = readSyntaxElement_FLC (currStream, numtrailingones); + + ntr = numtrailingones; + for (k = numcoeff - 1; k > numcoeff - 1 - numtrailingones; k--) + { + ntr --; + levarr[k] = (code>>ntr)&1 ? -1 : 1; + } + } + + // decode levels + level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1; + vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0; + + for (k = numcoeff - 1 - numtrailingones; k >= 0; k--) + { + +#if TRACE + snprintf(currSE.tracestring, + TRACESTRING_SIZE, "%s lev (%d,%d) k=%d vlc=%d ", type, i, j, k, vlcnum); +#endif + + int level; + if (vlcnum == 0) + level=readSyntaxElement_Level_VLC0(currStream); + else + level=readSyntaxElement_Level_VLCN(vlcnum, currStream); + + if (level_two_or_higher) + { + level += (level > 0) ? 1 : -1; + level_two_or_higher = 0; + } + + levarr[k] = level; + abslevel = iabs(levarr[k]); + if (abslevel == 1) + ++numones; + + // update VLC table + if (abslevel > incVlc[vlcnum]) + ++vlcnum; + + if (k == numcoeff - 1 - numtrailingones && abslevel >3) + vlcnum = 2; + } + + if (numcoeff < 15) + { + // decode total run + vlcnum = numcoeff - 1; + totzeros = readSyntaxElement_TotalZeros(currStream, vlcnum); + } + else + { + totzeros = 0; + } + + // decode run before each coefficient + zerosleft = totzeros; + i = numcoeff - 1; + + if (zerosleft > 0 && i > 0) + { + do + { + // select VLC for runbefore + vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1); + + runarr[i] = readSyntaxElement_Run(currStream, vlcnum); + + zerosleft -= runarr[i]; + i --; + } while (zerosleft != 0 && i != 0); + } + runarr[i] = zerosleft; + } // if numcoeff +} + +static void readCoeff4x4_CAVLC_ChromaDC(Macroblock *currMB, int i, int j, int levarr[16], int runarr[16], int *number_coefficients) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int mb_nr = currMB->mbAddrX; + SyntaxElement currSE; + DataPartition *dP; + Bitstream *currStream; + + int k, code, vlcnum; + int numcoeff = 0, numtrailingones; + int level_two_or_higher; + int numones, totzeros, abslevel; + int zerosleft, ntr; + int max_coeff_num; + static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6 + + max_coeff_num = p_Vid->num_cdc_coeff; + TRACE_PRINTF("ChrDC"); + p_Vid->nz_coeff[mb_nr][0][j][i] = 0; + if (IS_INTRA (currMB)) + dP = &(currSlice->partArr[assignSE2partition[currSlice->dp_mode][SE_CHR_DC_INTRA]]); + else + dP = &(currSlice->partArr[assignSE2partition[currSlice->dp_mode][SE_CHR_DC_INTER]]); + currStream = dP->bitstream; + + readSyntaxElement_NumCoeffTrailingOnesChromaDC(p_Vid, &currSE, currStream); + + numcoeff = currSE.value1; + numtrailingones = currSE.value2; + + memzero64(levarr); + memzero64(runarr); + + numones = numtrailingones; + *number_coefficients = numcoeff; + + if (numcoeff) + { + if (numtrailingones) + { + code = readSyntaxElement_FLC (currStream, numtrailingones); + + ntr = numtrailingones; + for (k = numcoeff - 1; k > numcoeff - 1 - numtrailingones; k--) + { + ntr --; + levarr[k] = (code>>ntr)&1 ? -1 : 1; + } + } + + // decode levels + level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1; + vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0; + + for (k = numcoeff - 1 - numtrailingones; k >= 0; k--) + { + int level; + if (vlcnum == 0) + level=readSyntaxElement_Level_VLC0(currStream); + else + level=readSyntaxElement_Level_VLCN(vlcnum, currStream); + + if (level_two_or_higher) + { + level += (level > 0) ? 1 : -1; + level_two_or_higher = 0; + } + + levarr[k] = level; + abslevel = iabs(levarr[k]); + if (abslevel == 1) + ++numones; + + // update VLC table + if (abslevel > incVlc[vlcnum]) + ++vlcnum; + + if (k == numcoeff - 1 - numtrailingones && abslevel >3) + vlcnum = 2; + } + + if (numcoeff < max_coeff_num) + { + // decode total run + vlcnum = numcoeff - 1; + totzeros = readSyntaxElement_TotalZerosChromaDC(p_Vid, currStream, vlcnum); + } + else + { + totzeros = 0; + } + + // decode run before each coefficient + zerosleft = totzeros; + i = numcoeff - 1; + + if (zerosleft > 0 && i > 0) + { + do + { + // select VLC for runbefore + vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1); + + runarr[i] = readSyntaxElement_Run(currStream, vlcnum); + + zerosleft -= runarr[i]; + i --; + } while (zerosleft != 0 && i != 0); + } + runarr[i] = zerosleft; + } // if numcoeff +} + +static void readCoeff4x4_CAVLC(Macroblock *currMB, int block_type, int i, int j, int levarr[16], int runarr[16], int *number_coefficients) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int mb_nr = currMB->mbAddrX; + SyntaxElement currSE; + DataPartition *dP; + Bitstream *currStream; + + int k, code, vlcnum; + int numcoeff = 0, numtrailingones, numcoeff_vlc; + int level_two_or_higher; + int numones, totzeros, abslevel; + int zerosleft, ntr, dptype = 0; + int max_coeff_num, nnz; + static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6 + + switch (block_type) + { + case LUMA: + readCoeff4x4_CAVLC_Luma(currMB, i, j, levarr, runarr, number_coefficients); + return; + case LUMA_INTRA16x16DC: + max_coeff_num = 16; + TRACE_PRINTF("Lum16DC"); + dptype = SE_LUM_DC_INTRA; + p_Vid->nz_coeff[mb_nr][0][j][i] = 0; + break; + case LUMA_INTRA16x16AC: + max_coeff_num = 15; + TRACE_PRINTF("Lum16AC"); + dptype = SE_LUM_AC_INTRA; + p_Vid->nz_coeff[mb_nr][0][j][i] = 0; + break; + case CB: + max_coeff_num = 16; + TRACE_PRINTF("Luma_add1"); + dptype = (IS_INTRA (currMB)) ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER; + p_Vid->nz_coeff[mb_nr][1][j][i] = 0; + break; + case CB_INTRA16x16DC: + max_coeff_num = 16; + TRACE_PRINTF("Luma_add1_16DC"); + dptype = SE_LUM_DC_INTRA; + p_Vid->nz_coeff[mb_nr][1][j][i] = 0; + break; + case CB_INTRA16x16AC: + max_coeff_num = 15; + TRACE_PRINTF("Luma_add1_16AC"); + dptype = SE_LUM_AC_INTRA; + p_Vid->nz_coeff[mb_nr][1][j][i] = 0; + break; + case CR: + max_coeff_num = 16; + TRACE_PRINTF("Luma_add2"); + dptype = (IS_INTRA (currMB)) ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER; + p_Vid->nz_coeff[mb_nr][2][j][i] = 0; + break; + case CR_INTRA16x16DC: + max_coeff_num = 16; + TRACE_PRINTF("Luma_add2_16DC"); + dptype = SE_LUM_DC_INTRA; + p_Vid->nz_coeff[mb_nr][2][j][i] = 0; + break; + case CR_INTRA16x16AC: + max_coeff_num = 15; + TRACE_PRINTF("Luma_add1_16AC"); + dptype = SE_LUM_AC_INTRA; + p_Vid->nz_coeff[mb_nr][2][j][i] = 0; + break; + case CHROMA_DC: + readCoeff4x4_CAVLC_ChromaDC(currMB, i, j, levarr, runarr, number_coefficients); + return; + case CHROMA_AC: + readCoeff4x4_CAVLC_ChromaAC(currMB, i, j, levarr, runarr, number_coefficients); + return; + default: + error ("readCoeff4x4_CAVLC: invalid block type", 600); + p_Vid->nz_coeff[mb_nr][0][j][i] = 0; + break; + } + + dP = &(currSlice->partArr[assignSE2partition[currSlice->dp_mode][dptype]]); + currStream = dP->bitstream; + + // luma or chroma AC + if(block_type==LUMA_INTRA16x16DC || block_type==LUMA_INTRA16x16AC) + { + nnz = predict_nnz_luma_intra(currMB, i<<2, j<<2); + } + else if (block_type==CB || block_type==CB_INTRA16x16DC || block_type==CB_INTRA16x16AC) + { + nnz = predict_nnz_cb(currMB, i<<2, j<<2); + } + else + { + nnz = predict_nnz_cr(currMB, i<<2, j<<2); + } + + if (nnz < 2) + { + numcoeff_vlc = 0; + } + else if (nnz < 4) + { + numcoeff_vlc = 1; + } + else if (nnz < 8) + { + numcoeff_vlc = 2; + } + else // + { + numcoeff_vlc = 3; + } + + readSyntaxElement_NumCoeffTrailingOnes(&currSE, currStream, numcoeff_vlc); + + numcoeff = currSE.value1; + numtrailingones = currSE.value2; + + if(block_type==LUMA_INTRA16x16DC || block_type==LUMA_INTRA16x16AC) + p_Vid->nz_coeff[mb_nr][0][j][i] = (byte) numcoeff; + else if (block_type==CB || block_type==CB_INTRA16x16DC || block_type==CB_INTRA16x16AC) + p_Vid->nz_coeff[mb_nr][1][j][i] = (byte) numcoeff; + else + p_Vid->nz_coeff[mb_nr][2][j][i] = (byte) numcoeff; + + + memzero64(levarr); + memzero64(runarr); + + numones = numtrailingones; + *number_coefficients = numcoeff; + + if (numcoeff) + { + if (numtrailingones) + { + code = readSyntaxElement_FLC(currStream, numtrailingones); + + ntr = numtrailingones; + for (k = numcoeff - 1; k > numcoeff - 1 - numtrailingones; k--) + { + ntr --; + levarr[k] = (code>>ntr)&1 ? -1 : 1; + } + } + + // decode levels + level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1; + vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0; + + for (k = numcoeff - 1 - numtrailingones; k >= 0; k--) + { + int level; + if (vlcnum == 0) + level=readSyntaxElement_Level_VLC0(currStream); + else + level=readSyntaxElement_Level_VLCN(vlcnum, currStream); + + if (level_two_or_higher) + { + level += (level > 0) ? 1 : -1; + level_two_or_higher = 0; + } + + levarr[k] = level; + abslevel = iabs(levarr[k]); + if (abslevel == 1) + ++numones; + + // update VLC table + if (abslevel > incVlc[vlcnum]) + ++vlcnum; + + if (k == numcoeff - 1 - numtrailingones && abslevel >3) + vlcnum = 2; + } + + if (numcoeff < max_coeff_num) + { + // decode total run + vlcnum = numcoeff - 1; + totzeros = readSyntaxElement_TotalZeros(currStream, vlcnum); + } + else + { + totzeros = 0; + } + + // decode run before each coefficient + zerosleft = totzeros; + i = numcoeff - 1; + + if (zerosleft > 0 && i > 0) + { + do + { + // select VLC for runbefore + vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1); + + runarr[i] = readSyntaxElement_Run(currStream, vlcnum); + + zerosleft -= runarr[i]; + i --; + } while (zerosleft != 0 && i != 0); + } + runarr[i] = zerosleft; + } // if numcoeff +} + +/*! +************************************************************************ +* \brief +* Get coefficients (run/level) of 4x4 blocks in a SMB +* from the NAL (CABAC Mode) +************************************************************************ +*/ +static void readCompCoeff4x4SMB_I16MB_CABAC(Macroblock *currMB, int context, h264_short_block_t *blocks, int block_y, int block_x, int64 *cbp_blk) +{ + // start_scan == 1 + int i,j,k; + RunLevel rl; + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + + const byte *pos_scan4x4 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_1D : FIELD_SCAN_1D; + const byte *pos_scan_4x4; + // make distinction between INTRA and INTER coded luminance coefficients + int type = (currMB->is_intra_block ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER); + DecodingEnvironment *de_cabac = &currSlice->partArr[partMap[type]].de_cabac; + + for (j = 0; j < BLOCK_SIZE_8x8; j += BLOCK_SIZE) + { + currMB->subblock_y = block_y + j; // position for coeff_count ctx + + for (i = 0; i < BLOCK_SIZE_8x8; i += BLOCK_SIZE) + { + int16_t *block = (int16_t *)(*blocks++); + currMB->subblock_x = block_x + i; // position for coeff_count ctx + pos_scan_4x4 = &pos_scan4x4[1]; + for(k = 0; k < 16; k++) + { + rl = readRunLevel_CABAC(currMB, de_cabac, context); + + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + block[*pos_scan_4x4++] = rl.level; + } + else + break; + } + } + } +} + +#ifdef _M_IX86 +static void readCompCoeff4x4SMB_CABAC(Macroblock *currMB, int context, h264_short_block_t *blocks, int block_y, int block_x, int64_t *cbp_blk64) +#else +static void readCompCoeff4x4SMB_CABAC(Macroblock *currMB, int context, h264_short_block_t *blocks, int block_y, int block_x, int64_t *cbp_blk) +#endif +{ + int k; + RunLevel rl; + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + const byte *pos_scan4x4 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_1D : FIELD_SCAN_1D; + const byte *pos_scan_4x4; + int16_t *block; +#ifdef _M_IX86 + int32_t *cbp_blk = (int32_t *)cbp_blk64; +#endif + //h264_short_block_t *blocks = &currSlice->cof4[pl][cof4_pos_to_subblock[block_y>>2][block_x>>2]]; + DecodingEnvironment *de_cabac_dc, *de_cabac_ac; + /* + * make distinction between INTRA and INTER coded + * luminance coefficients + */ + if (currMB->is_intra_block) + { + de_cabac_dc = &currSlice->partArr[partMap[SE_LUM_DC_INTRA]].de_cabac; + de_cabac_ac = &currSlice->partArr[partMap[SE_LUM_AC_INTRA]].de_cabac; + } + else + { + de_cabac_dc = &currSlice->partArr[partMap[SE_LUM_DC_INTER]].de_cabac; + de_cabac_ac = &currSlice->partArr[partMap[SE_LUM_AC_INTER]].de_cabac; + } +// for (j = block_y; j < (block_y+BLOCK_SIZE_8x8); j += 4) + + + block = (int16_t *)(*blocks++); + currMB->subblock_y = block_y; // position for coeff_count ctx + currMB->subblock_x = block_x; // position for coeff_count ctx + pos_scan_4x4 = pos_scan4x4; + rl = readRunLevel_CABAC(currMB, de_cabac_dc, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + *cbp_blk |= 1 << (block_y + (block_x >> 2)) ; + block[*pos_scan_4x4++] = rl.level; + for(k = 0; k < 16; ++k) + { + rl = readRunLevel_CABAC(currMB, de_cabac_ac, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + block[*pos_scan_4x4++] = rl.level; + } + else + break; + } + } + + block = (int16_t *)(*blocks++); + currMB->subblock_x += 4; // position for coeff_count ctx + pos_scan_4x4 = pos_scan4x4; + rl = readRunLevel_CABAC(currMB, de_cabac_dc, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + *cbp_blk |= 2 << (block_y + (block_x >> 2)) ; + block[*pos_scan_4x4++] = rl.level; + for(k = 0; k < 16; ++k) + { + rl = readRunLevel_CABAC(currMB, de_cabac_ac, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + block[*pos_scan_4x4++] = rl.level; + } + else + break; + } + } + /* ---- */ + block = (int16_t *)(*blocks++); + currMB->subblock_y += 4; // position for coeff_count ctx + currMB->subblock_x = block_x; // position for coeff_count ctx + pos_scan_4x4 = pos_scan4x4; + rl = readRunLevel_CABAC(currMB, de_cabac_dc, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + *cbp_blk |= 16 << (block_y + (block_x >> 2)) ; + block[*pos_scan_4x4++] = rl.level; + for(k = 0; k < 16; ++k) + { + rl = readRunLevel_CABAC(currMB, de_cabac_ac, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + block[*pos_scan_4x4++] = rl.level; + } + else + break; + } + } + + block = (int16_t *)(*blocks++); + currMB->subblock_x += 4; // position for coeff_count ctx + pos_scan_4x4 = pos_scan4x4; + rl = readRunLevel_CABAC(currMB, de_cabac_dc, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + *cbp_blk |= 32 << (block_y + (block_x >> 2)) ; + block[*pos_scan_4x4++] = rl.level; + for(k = 0; k < 16; ++k) + { + rl = readRunLevel_CABAC(currMB, de_cabac_ac, context); + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + block[*pos_scan_4x4++] = rl.level; + } + else + break; + } + } + +} + +#if defined(_DEBUG) || defined(_M_IX64) +static void inv_level_coefficients(h264_short_block_t *blocks, const int (*InvLevelScale)[4], int qp_per) +{ + int j, b; + + for (b = 0;b<4;b++) + { + h264_short_block_row_t *block = blocks[b]; + for (j = 0; j < 4; ++j) + { + if (block[j][0]) block[j][0]= rshift_rnd_sf((block[j][0] * InvLevelScale[j][0]) << qp_per, 4); + if (block[j][1]) block[j][1]= rshift_rnd_sf((block[j][1] * InvLevelScale[j][1]) << qp_per, 4); + if (block[j][2]) block[j][2]= rshift_rnd_sf((block[j][2] * InvLevelScale[j][2]) << qp_per, 4); + if (block[j][3]) block[j][3]= rshift_rnd_sf((block[j][3] * InvLevelScale[j][3]) << qp_per, 4); + } + } +} +#else +void inv_level_coefficients(h264_short_block_t *blocks, const int (*InvLevelScale)[4], int qp_per); +#endif + +static void inv_level_coefficients_AC(h264_short_block_t *blocks, const int (*InvLevelScale)[4], int qp_per) +{ + int b; + + for (b = 0;b<4;b++) + { + h264_short_block_row_t *block = blocks[b]; + if (block[0][1]) block[0][1]= rshift_rnd_sf((block[0][1] * InvLevelScale[0][1]) << qp_per, 4); + if (block[0][2]) block[0][2]= rshift_rnd_sf((block[0][2] * InvLevelScale[0][2]) << qp_per, 4); + if (block[0][3]) block[0][3]= rshift_rnd_sf((block[0][3] * InvLevelScale[0][3]) << qp_per, 4); + + if (block[1][0]) block[1][0]= rshift_rnd_sf((block[1][0] * InvLevelScale[1][0]) << qp_per, 4); + if (block[1][1]) block[1][1]= rshift_rnd_sf((block[1][1] * InvLevelScale[1][1]) << qp_per, 4); + if (block[1][2]) block[1][2]= rshift_rnd_sf((block[1][2] * InvLevelScale[1][2]) << qp_per, 4); + if (block[1][3]) block[1][3]= rshift_rnd_sf((block[1][3] * InvLevelScale[1][3]) << qp_per, 4); + + if (block[2][0]) block[2][0]= rshift_rnd_sf((block[2][0] * InvLevelScale[2][0]) << qp_per, 4); + if (block[2][1]) block[2][1]= rshift_rnd_sf((block[2][1] * InvLevelScale[2][1]) << qp_per, 4); + if (block[2][2]) block[2][2]= rshift_rnd_sf((block[2][2] * InvLevelScale[2][2]) << qp_per, 4); + if (block[2][3]) block[2][3]= rshift_rnd_sf((block[2][3] * InvLevelScale[2][3]) << qp_per, 4); + + if (block[3][0]) block[3][0]= rshift_rnd_sf((block[3][0] * InvLevelScale[3][0]) << qp_per, 4); + if (block[3][1]) block[3][1]= rshift_rnd_sf((block[3][1] * InvLevelScale[3][1]) << qp_per, 4); + if (block[3][2]) block[3][2]= rshift_rnd_sf((block[3][2] * InvLevelScale[3][2]) << qp_per, 4); + if (block[3][3]) block[3][3]= rshift_rnd_sf((block[3][3] * InvLevelScale[3][3]) << qp_per, 4); + } +} + +/*! +************************************************************************ +* \brief +* Get coefficients (run/level) of all 4x4 blocks in a MB +* from the NAL (CABAC Mode) +************************************************************************ +*/ +static void readCompCoeff4x4MB_CABAC(Macroblock *currMB, ColorPlane pl, int intra, int (*InvLevelScale4x4)[4], int qp_per, int cbp) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int start_scan = IS_I16MB (currMB)? 1 : 0; + int64 *cbp_blk = &currMB->cbp_blk[pl]; + int context; + h264_short_block_t *blocks = currSlice->cof4[pl]; + + currMB->is_intra_block = intra; + + if( pl == PLANE_Y || IS_INDEPENDENT(p_Vid) ) + context = (IS_I16MB(currMB) ? LUMA_16AC: LUMA_4x4); + else if (pl == PLANE_U) + context = (IS_I16MB(currMB) ? CB_16AC: CB_4x4); + else + context = (IS_I16MB(currMB) ? CR_16AC: CR_4x4); + if (start_scan == 0) + { + if (currMB->is_lossless == FALSE) + { + if (cbp & 1) + { + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk); + inv_level_coefficients(&blocks[0], InvLevelScale4x4, qp_per); + } + if (cbp & 2) + { + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk); + inv_level_coefficients(&blocks[4], InvLevelScale4x4, qp_per); + } + if (cbp & 4) + { + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk); + inv_level_coefficients(&blocks[8], InvLevelScale4x4, qp_per); + } + if (cbp & 8) + { + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk); + inv_level_coefficients(&blocks[12], InvLevelScale4x4, qp_per); + } + } + else + { + if (cbp & 1) + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk); + if (cbp & 2) + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk); + if (cbp & 4) + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk); + if (cbp & 8) + readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk); + } + } + else + { + if (currMB->is_lossless == FALSE) + { + if (cbp & 1) // are there any coeff in current block at all + { + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk); + inv_level_coefficients_AC(&blocks[0], InvLevelScale4x4, qp_per); + } + if (cbp & 2) // are there any coeff in current block at all + { + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk); + inv_level_coefficients_AC(&blocks[4], InvLevelScale4x4, qp_per); + } + if (cbp & 4) // are there any coeff in current block at all + { + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk); + inv_level_coefficients_AC(&blocks[8], InvLevelScale4x4, qp_per); + } + if (cbp & 8) // are there any coeff in current block at all + { + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk); + inv_level_coefficients_AC(&blocks[12], InvLevelScale4x4, qp_per); + } + } + else + { + if (cbp & 1) + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk); + if (cbp & 2) + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk); + if (cbp & 4) + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk); + if (cbp & 8) + readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk); + } + } +} + +/*! +************************************************************************ +* \brief +* Get coefficients (run/level) of one 8x8 block +* from the NAL (CABAC Mode) +************************************************************************ +*/ +static void readCompCoeff8x8_CABAC_Lossless(Macroblock *currMB, ColorPlane pl, int b8) +{ + if (currMB->cbp & (1<<b8)) // are there any coefficients in the current block + { + VideoParameters *p_Vid = currMB->p_Vid; + int transform_pl = IS_INDEPENDENT(p_Vid) ? p_Vid->colour_plane_id : pl; + int scan; + short *tcoeffs; + int k; + RunLevel rl; + int context; + DataPartition *dP; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + + int cbp_mask = (int64) 51 << (4 * b8 - 2 * (b8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position + int64 *cur_cbp = &currMB->cbp_blk[pl]; + + // select scan type + const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D; + + int qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[pl] ]; + int qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[pl] ]; + + const int *InvLevelScale8x8 = IS_INTRA(currMB)? currSlice->InvLevelScale8x8_Intra[transform_pl][qp_rem] : currSlice->InvLevelScale8x8_Inter[transform_pl][qp_rem]; + + currMB->is_intra_block = IS_INTRA(currMB); + + // === set offset in current macroblock === + tcoeffs = (short *)(currSlice->mb_rres8[pl][b8]); + + currMB->subblock_x = (b8&0x01) << 3; // position for coeff_count ctx + currMB->subblock_y = (b8 >> 1) << 3; // position for coeff_count ctx + + if (pl==PLANE_Y || IS_INDEPENDENT(p_Vid)) + context = LUMA_8x8; + else if (pl==PLANE_U) + context = CB_8x8; + else + context = CR_8x8; + + for(k=0; (k < 65);++k) + { + //============ read ============= + /* + * make distinction between INTRA and INTER coded + * luminance coefficients + */ + + int type = ((currMB->is_intra_block == 1) + ? (k==0 ? SE_LUM_DC_INTRA : SE_LUM_AC_INTRA) + : (k==0 ? SE_LUM_DC_INTER : SE_LUM_AC_INTER)); + + dP = &(currSlice->partArr[partMap[type]]); + rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), context); + + //============ decode ============= + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan8x8 += rl.run; + + scan = *pos_scan8x8++; + + *cur_cbp |= cbp_mask; + + tcoeffs[scan] = rl.level; + } + else + break; + } + } +} + + +static void readCompCoeff8x8_CABAC_Intra(Macroblock *currMB, ColorPlane pl, int b8) +{ + if (currMB->cbp & (1<<b8)) // are there any coefficients in the current block + { + VideoParameters *p_Vid = currMB->p_Vid; + int transform_pl = IS_INDEPENDENT(p_Vid) ? p_Vid->colour_plane_id : pl; + int scan; + short *tcoeffs; + RunLevel rl; + int k; + int context; + DecodingEnvironment *cabac; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + + int cbp_mask = (int64) 51 << (4 * b8 - 2 * (b8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position + int64 *cur_cbp = &currMB->cbp_blk[pl]; + + // select scan type + const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D; + + int qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[pl] ]; + int qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[pl] ]; + + const int *InvLevelScale8x8 = currSlice->InvLevelScale8x8_Intra[transform_pl][qp_rem]; + + currMB->is_intra_block = 1; + + // === set offset in current macroblock === + tcoeffs = (short *)(currSlice->mb_rres8[pl][b8]); + + currMB->subblock_x = (b8&0x01) << 3; // position for coeff_count ctx + currMB->subblock_y = (b8 >> 1) << 3; // position for coeff_count ctx + + if (pl==PLANE_Y || IS_INDEPENDENT(p_Vid)) + context = LUMA_8x8; + else if (pl==PLANE_U) + context = CB_8x8; + else + context = CR_8x8; + + // Read DC + cabac = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]].de_cabac); + rl = readRunLevel_CABAC(currMB, cabac, context); + + //============ decode ============= + if (rl.level != 0) /* leave if level == 0 */ + { + *cur_cbp |= cbp_mask; + + pos_scan8x8 += rl.run; + + scan = *pos_scan8x8++; + + tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization + + // AC coefficients + cabac = &(currSlice->partArr[partMap[SE_LUM_AC_INTRA]].de_cabac); + + k = 64; + do + { + rl = readRunLevel_CABAC(currMB, cabac, context); + + //============ decode ============= + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan8x8 += rl.run; + + scan = *pos_scan8x8++; + + tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization + } + else + break; + } while (--k); + } + } + +} + + + +static void readCompCoeff8x8_CABAC_Inter(Macroblock *currMB, ColorPlane pl, int b8) +{ + if (currMB->cbp & (1<<b8)) // are there any coefficients in the current block + { + VideoParameters *p_Vid = currMB->p_Vid; + int transform_pl = IS_INDEPENDENT(p_Vid) ? p_Vid->colour_plane_id : pl; + int scan; + short *tcoeffs; + int k; + RunLevel rl; + int context; + DecodingEnvironment *cabac; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + + int cbp_mask = (int64) 51 << (4 * b8 - 2 * (b8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position + int64 *cur_cbp = &currMB->cbp_blk[pl]; + + // select scan type + const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D; + + int qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[pl] ]; + int qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[pl] ]; + + const int *InvLevelScale8x8 = currSlice->InvLevelScale8x8_Inter[transform_pl][qp_rem]; + + currMB->is_intra_block = 0; + + // === set offset in current macroblock === + tcoeffs = (short *)(currSlice->mb_rres8[pl][b8]); + + currMB->subblock_x = (b8&0x01) << 3; // position for coeff_count ctx + currMB->subblock_y = (b8 >> 1) << 3; // position for coeff_count ctx + + if (pl==PLANE_Y || IS_INDEPENDENT(p_Vid)) + context = LUMA_8x8; + else if (pl==PLANE_U) + context = CB_8x8; + else + context = CR_8x8; + + // Read DC + cabac = &(currSlice->partArr[partMap[SE_LUM_DC_INTER]].de_cabac); + rl = readRunLevel_CABAC(currMB, cabac, context); + + //============ decode ============= + if (rl.level != 0) /* leave if level == 0 */ + { + *cur_cbp |= cbp_mask; + + pos_scan8x8 += rl.run; + + scan = *pos_scan8x8++; + + tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization + + // AC coefficients + cabac = &(currSlice->partArr[partMap[SE_LUM_AC_INTER]].de_cabac); + + k=64; + do + { + rl = readRunLevel_CABAC(currMB, cabac, context); + + //============ decode ============= + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan8x8 += rl.run; + + scan = *pos_scan8x8++; + + tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization + } + else + break; + } while (--k); + } + } + +} + + +/*! +************************************************************************ +* \brief +* Get coefficients (run/level) of 8x8 blocks in a MB +* from the NAL (CABAC Mode) +************************************************************************ +*/ +static void readCompCoeff8x8MB_CABAC(Macroblock *currMB, ColorPlane pl) +{ + //======= 8x8 transform size & CABAC ======== + if(currMB->is_lossless == FALSE) + { + if (IS_INTRA(currMB)) + { + readCompCoeff8x8_CABAC_Intra(currMB, pl, 0); + readCompCoeff8x8_CABAC_Intra(currMB, pl, 1); + readCompCoeff8x8_CABAC_Intra(currMB, pl, 2); + readCompCoeff8x8_CABAC_Intra(currMB, pl, 3); + } + else + { + readCompCoeff8x8_CABAC_Inter(currMB, pl, 0); + readCompCoeff8x8_CABAC_Inter(currMB, pl, 1); + readCompCoeff8x8_CABAC_Inter(currMB, pl, 2); + readCompCoeff8x8_CABAC_Inter(currMB, pl, 3); + } + } + else + { + readCompCoeff8x8_CABAC_Lossless(currMB, pl, 0); + readCompCoeff8x8_CABAC_Lossless(currMB, pl, 1); + readCompCoeff8x8_CABAC_Lossless(currMB, pl, 2); + readCompCoeff8x8_CABAC_Lossless(currMB, pl, 3); + } +} + +/*! +************************************************************************ +* \brief +* Get coefficients (run/level) of 4x4 blocks in a MB +* from the NAL (CABAC Mode) +************************************************************************ +*/ +static void readCompCoeff4x4MB_CAVLC (Macroblock *currMB, ColorPlane pl, int (*InvLevelScale4x4)[4], int qp_per, int cbp, h264_4x4_byte nzcoeff) +{ + int block_y, block_x, b8; + int i, j, k; + int i0, j0; + __declspec(align(32)) int levarr[16], runarr[16]; + int numcoeff; + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + const byte (*pos_scan4x4)[2] = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN : FIELD_SCAN; + const byte *pos_scan_4x4 = pos_scan4x4[0]; + int start_scan = IS_I16MB(currMB) ? 1 : 0; + int64 *cur_cbp = &currMB->cbp_blk[pl]; + int coef_ctr, cur_context; + + memzero64(levarr); + memzero64(runarr); + + if (IS_I16MB(currMB)) + { + if (pl == PLANE_Y) + cur_context = LUMA_INTRA16x16AC; + else if (pl == PLANE_U) + cur_context = CB_INTRA16x16AC; + else + cur_context = CR_INTRA16x16AC; + } + else + { + if (pl == PLANE_Y) + cur_context = LUMA; + else if (pl == PLANE_U) + cur_context = CB; + else + cur_context = CR; + } + + if (currMB->is_lossless == FALSE) + { + for (block_y = 0; block_y < 4; block_y += 2) /* all modes */ + { + for (block_x = 0; block_x < 4; block_x += 2) + { + b8 = (block_y + (block_x >> 1)); + + if (cbp & (1 << b8)) // test if the block contains any coefficients + { + for (j=block_y << 2; j < (block_y + 2) << 2; j += BLOCK_SIZE) + { + for (i=block_x << 2; i < (block_x + 2) << 2; i += BLOCK_SIZE) + { + readCoeff4x4_CAVLC(currMB, cur_context, i >> 2, j >> 2, levarr, runarr, &numcoeff); + pos_scan_4x4 = pos_scan4x4[start_scan]; + + for (k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) + { + pos_scan_4x4 += (runarr[k] << 1); + + i0 = *pos_scan_4x4++; + j0 = *pos_scan_4x4++; + + // inverse quant for 4x4 transform only + *cur_cbp |= (int64) 1 << (j + (i >> 2)); + + currSlice->cof4[pl][cof4_pos_to_subblock[j>>2][i>>2]][j0][i0]= rshift_rnd_sf((levarr[k] * InvLevelScale4x4[j0][i0])<<qp_per, 4); + } + } + } + } + } + else + { + for (j=0; j < 2; j++) + { + for (i=0;i<2;i++) + { + nzcoeff[block_y+j][block_x+i]=0; + } + } + } + } + } + } + else + { + for (block_y=0; block_y < 4; block_y += 2) /* all modes */ + { + for (block_x=0; block_x < 4; block_x += 2) + { + b8 = 2*(block_y>>1) + (block_x>>1); + + if (cbp & (1<<b8)) /* are there any coeff in current block at all */ + { + for (j=block_y; j < block_y+2; ++j) + { + for (i=block_x; i < block_x+2; ++i) + { + readCoeff4x4_CAVLC(currMB, cur_context, i, j, levarr, runarr, &numcoeff); + + coef_ctr = start_scan - 1; + + for (k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) + { + coef_ctr += runarr[k]+1; + + i0=pos_scan4x4[coef_ctr][0]; + j0=pos_scan4x4[coef_ctr][1]; + + *cur_cbp |= (int64) 1 << ((j<<2) + i); + currSlice->cof4[pl][cof4_pos_to_subblock[j>>2][i>>2]][j0][i0]= levarr[k]; + } + } + } + } + } + else + { + for (j=0; j < 2; j++) + { + for (i=0;i<2;i++) + { + nzcoeff[block_y+j][block_x+i]=0; + } + } + } + } + } + } +} + + +/*! +************************************************************************ +* \brief +* Get coefficients (run/level) of 4x4 blocks in a MB +* from the NAL (CABAC Mode) +************************************************************************ +*/ +static void readCompCoeff8x8MB_CAVLC (Macroblock *currMB, ColorPlane pl, const int *InvLevelScale8x8, int qp_per, int cbp, h264_4x4_byte nzcoeff) +{ + int block_y, block_x, b4, b8; + int i,j,k; + int scan; + __declspec(align(32)) int levarr[16] = {0}, runarr[16] = {0}; + int numcoeff; + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D; + int start_scan = IS_I16MB(currMB) ? 1 : 0; + int64 *cur_cbp = &currMB->cbp_blk[pl]; + int coef_ctr, cur_context; + short *coefficients; + + if (IS_I16MB(currMB)) + { + if (pl == PLANE_Y) + cur_context = LUMA_INTRA16x16AC; + else if (pl == PLANE_U) + cur_context = CB_INTRA16x16AC; + else + cur_context = CR_INTRA16x16AC; + } + else + { + if (pl == PLANE_Y) + cur_context = LUMA; + else if (pl == PLANE_U) + cur_context = CB; + else + cur_context = CR; + } + + if (currMB->is_lossless == FALSE) + { + for (block_y=0; block_y < 4; block_y += 2) /* all modes */ + { + for (block_x=0; block_x < 4; block_x += 2) + { + b8 = block_y + (block_x>>1); + coefficients =(short *)(currSlice->mb_rres8[pl][b8]); + if (cbp & (1<<b8)) /* are there any coeff in current block at all */ + { + for (j=block_y; j < block_y+2; ++j) + { + for (i=block_x; i < block_x+2; ++i) + { + readCoeff4x4_CAVLC(currMB, cur_context, i, j, levarr, runarr, &numcoeff); + + coef_ctr = start_scan - 1; + + for (k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) + { + coef_ctr += runarr[k]+1; + + // do same as CABAC for deblocking: any coeff in the 8x8 marks all the 4x4s + //as containing coefficients + *cur_cbp |= 51 << ((block_y<<2) + block_x); + + b4 = (coef_ctr << 2) + 2*(j - block_y)+(i - block_x); + + scan = pos_scan8x8[b4]; + + coefficients[scan] = rshift_rnd_sf((levarr[k] * InvLevelScale8x8[scan])<<qp_per, 6); // dequantization + } + }//else (!currMB->luma_transform_size_8x8_flag) + } + } + } + else + { + for (j=block_y; j < block_y+2; ++j) + { + memset(&nzcoeff[j][block_x], 0, 2 * sizeof(byte)); + } + } + } + } + } + else // inverse quant for 8x8 transform + { + for (block_y=0; block_y < 4; block_y += 2) /* all modes */ + { + for (block_x=0; block_x < 4; block_x += 2) + { + b8 = 2*(block_y>>1) + (block_x>>1); + coefficients =(short *)(currSlice->mb_rres8[pl][b8]); + if (cbp & (1<<b8)) /* are there any coeff in current block at all */ + { + for (j=block_y; j < block_y+2; ++j) + { + for (i=block_x; i < block_x+2; ++i) + { + + readCoeff4x4_CAVLC(currMB, cur_context, i, j, levarr, runarr, &numcoeff); + + coef_ctr = start_scan - 1; + + for (k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) + { + coef_ctr += runarr[k]+1; + + // do same as CABAC for deblocking: any coeff in the 8x8 marks all the 4x4s + //as containing coefficients + *cur_cbp |= 51 << ((block_y<<2) + block_x); + + b4 = 2*(j-block_y)+(i-block_x); + + scan=pos_scan8x8[coef_ctr*4+b4]; + + coefficients[scan] = levarr[k]; + } + } + } + } + } + else + { + for (j=block_y; j < block_y+2; ++j) + { + memset(&nzcoeff[j][block_x], 0, 2 * sizeof(byte)); + } + } + } + } + } +} + +/*! +************************************************************************ +* \brief +* Data partitioning: Check if neighboring macroblock is needed for +* CAVLC context decoding, and disable current MB if data partition +* is missing. +************************************************************************ +*/ +static void check_dp_neighbors (Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + if (IS_INTER (currMB) || (IS_INTRA (currMB) && !(p_Vid->active_pps->constrained_intra_pred_flag)) ) + { + PixelPos up, left; + + p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[1], &left); + p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[1], &up); + + if (left.available) + { + currMB->dpl_flag |= p_Vid->mb_data[left.mb_addr].dpl_flag; + } + if (up.available) + { + currMB->dpl_flag |= p_Vid->mb_data[up.mb_addr].dpl_flag; + } + } +} + + +/*! +************************************************************************ +* \brief +* Get coded block pattern and coefficients (run/level) +* from the NAL +************************************************************************ +*/ +static void read_CBP_and_coeffs_from_NAL_CABAC(Macroblock *currMB) +{ + int i,j,k; + int cbp; + SyntaxElement currSE; + DataPartition *dP = NULL; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + int coef_ctr, i0, j0, b8; + int ll; + RunLevel rl; + + int qp_per, qp_rem; + VideoParameters *p_Vid = currMB->p_Vid; + int intra = IS_INTRA (currMB); + int smb = ((p_Vid->type==SP_SLICE) && !intra) || (p_Vid->type == SI_SLICE && currMB->mb_type == SI4MB); + + int uv; + int qp_per_uv[2]; + int qp_rem_uv[2]; + + + int temp[4]; + + int b4; + StorablePicture *dec_picture = p_Vid->dec_picture; + int yuv = dec_picture->chroma_format_idc - 1; + int m6[4]; + + int need_transform_size_flag; + + int (*InvLevelScale4x4)[4] = NULL; + + // select scan type + const byte (*pos_scan4x4)[2] = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN : FIELD_SCAN; + const byte *pos_scan4x4_1d = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_1D : FIELD_SCAN_1D; + const byte *pos_scan4x4_dc = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_DC : FIELD_SCAN_DC; + const byte *pos_scan_4x4; + + // QPI + //init constants for every chroma qp offset + if (dec_picture->chroma_format_idc != YUV400) + { + for (i=0; i<2; ++i) + { + qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ]; + qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ]; + } + } + + // read CBP if not new intra mode + if (!IS_I16MB (currMB)) + { + //===== C B P ===== + //--------------------- + int type = (currMB->mb_type == I4MB || currMB->mb_type == SI4MB || currMB->mb_type == I8MB) + ? SE_CBP_INTRA + : SE_CBP_INTER; + + dP = &(currSlice->partArr[partMap[type]]); + + currMB->cbp = cbp = readCBP_CABAC(currMB, &(dP->de_cabac)); + + TRACE_STRING("coded_block_pattern"); + + + //============= Transform size flag for INTER MBs ============= + //------------------------------------------------------------- + need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)|| + (IS_DIRECT(currMB) && p_Vid->active_sps->direct_8x8_inference_flag) || + (currMB->NoMbPartLessThan8x8Flag)) + && currMB->mb_type != I8MB && currMB->mb_type != I4MB + && (currMB->cbp&15) + && p_Vid->Transform8x8Mode); + + if (need_transform_size_flag) + { + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + TRACE_STRING("transform_size_8x8_flag"); + + // read CAVLC transform_size_8x8_flag + currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &(dP->de_cabac)); + } + + //===== DQUANT ===== + //---------------------- + // Delta quant only if nonzero coeffs + if (cbp !=0) + { + read_delta_quant_CABAC(&currSE, dP, currMB, partMap, (!intra) ? SE_DELTA_QUANT_INTER : SE_DELTA_QUANT_INTRA); + + if (currSlice->dp_mode) + { + if (!intra && currSlice->dpC_NotPresent ) + currMB->dpl_flag = 1; + + if( intra && currSlice->dpB_NotPresent ) + { + currMB->ei_flag = 1; + currMB->dpl_flag = 1; + } + + // check for prediction from neighbours + check_dp_neighbors (currMB); + if (currMB->dpl_flag) + { + cbp = 0; + currMB->cbp = cbp; + } + } + } + } + else + { + cbp = currMB->cbp; + } + + if (IS_I16MB (currMB)) // read DC coeffs for new intra modes + { + read_delta_quant_CABAC(&currSE, dP, currMB, partMap, SE_DELTA_QUANT_INTRA); + + macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y); + + if (currSlice->dp_mode) + { + if (currSlice->dpB_NotPresent) + { + currMB->ei_flag = 1; + currMB->dpl_flag = 1; + } + check_dp_neighbors (currMB); + if (currMB->dpl_flag) + { + currMB->cbp = cbp = 0; + } + } + + if (!currMB->dpl_flag) + { + pos_scan_4x4 = pos_scan4x4_dc; + + { + dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]); + + currMB->is_intra_block = 1; + + for(k = 0; k < 17 ; k++) + { + rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), LUMA_16DC); + + if (rl.level != 0) /* leave if level == 0 */ + { + pos_scan_4x4 += rl.run; + currSlice->cof4[0][*pos_scan_4x4++][0][0] = rl.level;// add new intra DC coeff + } + else + break; + } + + } + + if(currMB->is_lossless == FALSE) + itrans_2(currMB, (ColorPlane) p_Vid->colour_plane_id);// transform new intra DC + } + } + + update_qp(currMB, p_Vid->qp); + + qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ]; + qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ]; + + //init quant parameters for chroma + if (dec_picture->chroma_format_idc != YUV400) + { + for(i=0; i < 2; ++i) + { + qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ]; + qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ]; + } + } + + InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[p_Vid->colour_plane_id][qp_rem] : currSlice->InvLevelScale4x4_Inter[p_Vid->colour_plane_id][qp_rem]; + + // luma coefficients + { + //======= Other Modes & CABAC ======== + //------------------------------------ + if (cbp) + { + if(currMB->luma_transform_size_8x8_flag) + { + //======= 8x8 transform size & CABAC ======== + readCompCoeff8x8MB_CABAC (currMB, PLANE_Y); + } + else + { + readCompCoeff4x4MB_CABAC (currMB, PLANE_Y, intra, InvLevelScale4x4, qp_per, cbp); + } + } + } + + if ( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) ) + { + for (uv = 0; uv < 2; ++uv ) + { + /*----------------------16x16DC Luma_Add----------------------*/ + if (IS_I16MB (currMB)) // read DC coeffs for new intra modes + { + macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y); + + { + int context; + dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]); + + if( IS_INDEPENDENT(p_Vid) ) + context = LUMA_16DC; + else + context = (uv==0) ? CB_16DC : CR_16DC; + + currMB->is_intra_block = 1; + + coef_ctr = -1; + + for(k=0;k<17;++k) + { + rl = readRunLevel_CABAC(currMB, &dP->de_cabac, context); + + if (rl.level != 0) // leave if level == 0 + { + coef_ctr += rl.run + 1; + currSlice->cof4[uv + 1][pos_scan4x4_1d[coef_ctr]][0][0] = rl.level; + } + else + break; + } //k loop + } // else CAVLC + + if(currMB->is_lossless == FALSE) + { + itrans_2(currMB, (ColorPlane) (uv + 1)); // transform new intra DC + } + } //IS_I16MB + + update_qp(currMB, p_Vid->qp); + + qp_per = p_Vid->qp_per_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ]; + qp_rem = p_Vid->qp_rem_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ]; + + //init constants for every chroma qp offset + qp_per_uv[uv] = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ]; + qp_rem_uv[uv] = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ]; + + InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + + { + if (cbp) + { + if(currMB->luma_transform_size_8x8_flag) + { + //======= 8x8 transform size & CABAC ======== + readCompCoeff8x8MB_CABAC(currMB, (ColorPlane) (PLANE_U + uv)); + } + else //4x4 + { + readCompCoeff4x4MB_CABAC(currMB, (ColorPlane) (PLANE_U + uv), intra, InvLevelScale4x4, qp_per_uv[uv], cbp); + } + } + } + } + } //444 + else if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + //========================== CHROMA DC ============================ + //----------------------------------------------------------------- + // chroma DC coeff + if(cbp>15) + { + if (dec_picture->chroma_format_idc == YUV420) + { + for (ll=0;ll<3;ll+=2) + { + uv = ll>>1; + + InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + //===================== CHROMA DC YUV420 ====================== + memzero16(&currSlice->cofu[0]); + coef_ctr=-1; + + { + int type = (intra ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER); + + currMB->is_intra_block = intra; + currMB->is_v_block = ll; + + dP = &(currSlice->partArr[partMap[type]]); + + for(k = 0; k < (p_Vid->num_cdc_coeff + 1);++k) + { + rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), CHROMA_DC); + + if (rl.level != 0) + { + currMB->cbp_blk[0] |= 0xf0000 << (ll<<1) ; + coef_ctr += rl.run + 1; + + // Bug: currSlice->cofu has only 4 entries, hence coef_ctr MUST be <4 (which is + // caught by the assert(). If it is bigger than 4, it starts patching the + // p_Vid->predmode pointer, which leads to bugs later on. + // + // This assert() should be left in the code, because it captures a very likely + // bug early when testing in error prone environments (or when testing NAL + // functionality). + assert (coef_ctr < p_Vid->num_cdc_coeff); + currSlice->cofu[coef_ctr&3]=rl.level; + } + else + break; + } + } + + if (smb || (currMB->is_lossless == TRUE)) // check to see if MB type is SPred or SIntra4x4 + { + currSlice->cof4[uv + 1][0][0][0] = currSlice->cofu[0]; + currSlice->cof4[uv + 1][1][0][0] = currSlice->cofu[1]; + currSlice->cof4[uv + 1][2][0][0] = currSlice->cofu[2]; + currSlice->cof4[uv + 1][3][0][0] = currSlice->cofu[3]; + } + else + { + ihadamard2x2(currSlice->cofu, temp); + + currSlice->cof4[uv + 1][0][0][0] = (((temp[0] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + currSlice->cof4[uv + 1][1][0][0] = (((temp[1] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + currSlice->cof4[uv + 1][2][0][0] = (((temp[2] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + currSlice->cof4[uv + 1][3][0][0] = (((temp[3] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + } + } + } + else if (dec_picture->chroma_format_idc == YUV422) + { + for (ll=0;ll<3;ll+=2) + { + int (*InvLevelScale4x4)[4] = NULL; + uv = ll>>1; + { + h264_short_block_t *imgcof = currSlice->cof4[uv + 1]; + int m3[2][4] = {{0,0,0,0},{0,0,0,0}}; + int m4[2][4] = {{0,0,0,0},{0,0,0,0}}; + int qp_per_uv_dc = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only + int qp_rem_uv_dc = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only + if (intra) + InvLevelScale4x4 = currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv_dc]; + else + InvLevelScale4x4 = currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv_dc]; + + + //===================== CHROMA DC YUV422 ====================== + { + coef_ctr=-1; + for(k=0;k<9;++k) + { + int type = (intra ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER); + currMB->is_intra_block = intra; + currMB->is_v_block = ll; + + dP = &(currSlice->partArr[partMap[type]]); + + rl = readRunLevel_CABAC(currMB, &dP->de_cabac, CHROMA_DC_2x4); + + if (rl.level != 0) + { + currMB->cbp_blk[0] |= ((int64)0xff0000) << (ll<<2) ; + coef_ctr += rl.run + 1; + assert (coef_ctr < p_Vid->num_cdc_coeff); + i0=SCAN_YUV422[coef_ctr][0]; + j0=SCAN_YUV422[coef_ctr][1]; + + m3[i0][j0]=rl.level; + } + else + break; + } + } + // inverse CHROMA DC YUV422 transform + // horizontal + if(currMB->is_lossless == FALSE) + { + m4[0][0] = m3[0][0] + m3[1][0]; + m4[0][1] = m3[0][1] + m3[1][1]; + m4[0][2] = m3[0][2] + m3[1][2]; + m4[0][3] = m3[0][3] + m3[1][3]; + + m4[1][0] = m3[0][0] - m3[1][0]; + m4[1][1] = m3[0][1] - m3[1][1]; + m4[1][2] = m3[0][2] - m3[1][2]; + m4[1][3] = m3[0][3] - m3[1][3]; + + for (i = 0; i < 2; ++i) + { + m6[0] = m4[i][0] + m4[i][2]; + m6[1] = m4[i][0] - m4[i][2]; + m6[2] = m4[i][1] - m4[i][3]; + m6[3] = m4[i][1] + m4[i][3]; + + imgcof[cof4_pos_to_subblock[0][i]][0][0] = m6[0] + m6[3]; + imgcof[cof4_pos_to_subblock[1][i]][0][0] = m6[1] + m6[2]; + imgcof[cof4_pos_to_subblock[2][i]][0][0] = m6[1] - m6[2]; + imgcof[cof4_pos_to_subblock[3][i]][0][0]= m6[0] - m6[3]; + }//for (i=0;i<2;++i) + } + else + { + for(j=0;j<4;++j) + { + for(i=0;i<2;++i) + { + currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][0][0] = m3[i][j]; + } + } + } + + for(j = 0;j < p_Vid->mb_cr_size_y; j += BLOCK_SIZE) + { + for(i=0;i < p_Vid->mb_cr_size_x;i+=BLOCK_SIZE) + { + imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] = rshift_rnd_sf((imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] * InvLevelScale4x4[0][0]) << qp_per_uv_dc, 6); + } + } + } + }//for (ll=0;ll<3;ll+=2) + }//else if (dec_picture->chroma_format_idc == YUV422) + } + + //========================== CHROMA AC ============================ + //----------------------------------------------------------------- + // chroma AC coeff, all zero fram start_scan + if (cbp<=31) + { + } + else + { + { + int type; + currMB->is_intra_block = intra; + type = (intra ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER); + + dP = &(currSlice->partArr[partMap[type]]); + + + if(currMB->is_lossless == FALSE) + { + for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8) + { + currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 )); + InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + + for (b4 = 0; b4 < 4; ++b4) + { + int *scale = &InvLevelScale4x4[0][0]; + i = cofuv_blk_x[yuv][b8][b4]; + j = cofuv_blk_y[yuv][b8][b4]; + + currMB->subblock_y = subblk_offset_y[yuv][b8][b4]; + currMB->subblock_x = subblk_offset_x[yuv][b8][b4]; + + pos_scan_4x4 = &pos_scan4x4_1d[1]; + for(k = 0; k < 16;++k) + { + rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), CHROMA_AC); + + if (rl.level != 0) + { + byte position; + currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4]; + pos_scan_4x4 += rl.run; + position = *pos_scan_4x4++; + + ((int16_t *)currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]])[position] = rshift_rnd_sf((rl.level * scale[position])<<qp_per_uv[uv], 4); + } + else + break; + } //for(k=0;(k<16)&&(level!=0);++k) + } + } + } + else + { + for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8) + { + currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 )); + + for (b4=0; b4 < 4; ++b4) + { + i = cofuv_blk_x[yuv][b8][b4]; + j = cofuv_blk_y[yuv][b8][b4]; + + pos_scan_4x4 = &pos_scan4x4_1d[1]; + + currMB->subblock_y = subblk_offset_y[yuv][b8][b4]; + currMB->subblock_x = subblk_offset_x[yuv][b8][b4]; + + for(k=0;k<16;++k) + { + rl = readRunLevel_CABAC(currMB, &dP->de_cabac, CHROMA_AC); + + if (rl.level != 0) + { + currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4]; + pos_scan_4x4 += rl.run; + + ((int16_t *)currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]])[*pos_scan_4x4++] = rl.level; + } + else + break; + } + } + } + } //for (b4=0; b4 < 4; b4++) + } //for (b8=0; b8 < p_Vid->num_blk8x8_uv; b8++) + } //if (dec_picture->chroma_format_idc != YUV400) + } +} + +/*! +************************************************************************ +* \brief +* Get coded block pattern and coefficients (run/level) +* from the NAL +************************************************************************ +*/ +static void read_CBP_and_coeffs_from_NAL_CAVLC(Macroblock *currMB) +{ + int i,j,k; + int level; + int mb_nr = currMB->mbAddrX; + int cbp; + SyntaxElement currSE; + DataPartition *dP = NULL; + Slice *currSlice = currMB->p_Slice; + const byte *partMap = assignSE2partition[currSlice->dp_mode]; + int coef_ctr, i0, j0, b8; + int ll; + __declspec(align(32)) int levarr[16], runarr[16]; + int numcoeff; + + int qp_per, qp_rem; + VideoParameters *p_Vid = currMB->p_Vid; + int smb = ((p_Vid->type==SP_SLICE) && IS_INTER (currMB)) || (p_Vid->type == SI_SLICE && currMB->mb_type == SI4MB); + + int uv; + int qp_per_uv[2]; + int qp_rem_uv[2]; + + int intra = IS_INTRA (currMB); + int temp[4]; + + int b4; + StorablePicture *dec_picture = p_Vid->dec_picture; + int yuv = dec_picture->chroma_format_idc - 1; + int m6[4]; + + int need_transform_size_flag; + + int (*InvLevelScale4x4)[4] = NULL; + const int *InvLevelScale8x8 = NULL; + // select scan type + const byte (*pos_scan4x4)[2] = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN : FIELD_SCAN; + const byte *pos_scan_4x4 = pos_scan4x4[0]; + + // QPI + //init constants for every chroma qp offset + if (dec_picture->chroma_format_idc != YUV400) + { + for (i=0; i<2; ++i) + { + qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ]; + qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ]; + } + } + + // read CBP if not new intra mode + if (!IS_I16MB (currMB)) + { + //===== C B P ===== + //--------------------- + int type = (currMB->mb_type == I4MB || currMB->mb_type == SI4MB || currMB->mb_type == I8MB) + ? SE_CBP_INTRA + : SE_CBP_INTER; + + dP = &(currSlice->partArr[partMap[type]]); + + currSE.mapping = (currMB->mb_type == I4MB || currMB->mb_type == SI4MB || currMB->mb_type == I8MB) + ? currSlice->linfo_cbp_intra + : currSlice->linfo_cbp_inter; + + TRACE_STRING("coded_block_pattern"); + readSyntaxElement_UVLC(&currSE, dP); + currMB->cbp = cbp = currSE.value1; + + + //============= Transform size flag for INTER MBs ============= + //------------------------------------------------------------- + need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)|| + (IS_DIRECT(currMB) && p_Vid->active_sps->direct_8x8_inference_flag) || + (currMB->NoMbPartLessThan8x8Flag)) + && currMB->mb_type != I8MB && currMB->mb_type != I4MB + && (currMB->cbp&15) + && p_Vid->Transform8x8Mode); + + if (need_transform_size_flag) + { + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + TRACE_STRING("transform_size_8x8_flag"); + + // read CAVLC transform_size_8x8_flag + currMB->luma_transform_size_8x8_flag = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1); + } + + //===== DQUANT ===== + //---------------------- + // Delta quant only if nonzero coeffs + if (cbp !=0) + { + read_delta_quant_CAVLC(&currSE, dP, currMB, partMap, (IS_INTER (currMB)) ? SE_DELTA_QUANT_INTER : SE_DELTA_QUANT_INTRA); + + if (currSlice->dp_mode) + { + if (IS_INTER (currMB) && currSlice->dpC_NotPresent ) + currMB->dpl_flag = 1; + + if( intra && currSlice->dpB_NotPresent ) + { + currMB->ei_flag = 1; + currMB->dpl_flag = 1; + } + + // check for prediction from neighbours + check_dp_neighbors (currMB); + if (currMB->dpl_flag) + { + cbp = 0; + currMB->cbp = cbp; + } + } + } + } + else + { + cbp = currMB->cbp; + } + + if (IS_I16MB (currMB)) // read DC coeffs for new intra modes + { + read_delta_quant_CAVLC(&currSE, dP, currMB, partMap, SE_DELTA_QUANT_INTRA); + + macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y); + + if (currSlice->dp_mode) + { + if (currSlice->dpB_NotPresent) + { + currMB->ei_flag = 1; + currMB->dpl_flag = 1; + } + check_dp_neighbors (currMB); + if (currMB->dpl_flag) + { + currMB->cbp = cbp = 0; + } + } + + if (!currMB->dpl_flag) + { + pos_scan_4x4 = pos_scan4x4[0]; + + readCoeff4x4_CAVLC(currMB, LUMA_INTRA16x16DC, 0, 0, levarr, runarr, &numcoeff); + + for(k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) // leave if level == 0 + { + pos_scan_4x4 += 2 * runarr[k]; + + i0 = (*pos_scan_4x4++); + j0 = (*pos_scan_4x4++); + + currSlice->cof4[0][cof4_pos_to_subblock[j0][i0]][0][0] = levarr[k];// add new intra DC coeff + } + } + + + if(currMB->is_lossless == FALSE) + itrans_2(currMB, (ColorPlane) p_Vid->colour_plane_id);// transform new intra DC + } + } + + update_qp(currMB, p_Vid->qp); + + qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ]; + qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ]; + + //init quant parameters for chroma + if (dec_picture->chroma_format_idc != YUV400) + { + for(i=0; i < 2; ++i) + { + qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ]; + qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ]; + } + } + + InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[p_Vid->colour_plane_id][qp_rem] : currSlice->InvLevelScale4x4_Inter[p_Vid->colour_plane_id][qp_rem]; + InvLevelScale8x8 = intra? currSlice->InvLevelScale8x8_Intra[p_Vid->colour_plane_id][qp_rem] : currSlice->InvLevelScale8x8_Inter[p_Vid->colour_plane_id][qp_rem]; + + // luma coefficients + if (cbp) + { + if (!currMB->luma_transform_size_8x8_flag) // 4x4 transform + { + readCompCoeff4x4MB_CAVLC(currMB, PLANE_Y, InvLevelScale4x4, qp_per, cbp, p_Vid->nz_coeff[mb_nr][PLANE_Y]); + } + else // 8x8 transform + { + readCompCoeff8x8MB_CAVLC(currMB, PLANE_Y, InvLevelScale8x8, qp_per, cbp, p_Vid->nz_coeff[mb_nr][PLANE_Y]); + } + } + else + { + memset(&p_Vid->nz_coeff[mb_nr][0][0][0], 0, BLOCK_SIZE * BLOCK_SIZE * sizeof(byte)); + } + + if ( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) ) + { + for (uv = 0; uv < 2; ++uv ) + { + /*----------------------16x16DC Luma_Add----------------------*/ + if (IS_I16MB (currMB)) // read DC coeffs for new intra modes + { + macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y); + + if (uv == 0) + readCoeff4x4_CAVLC(currMB, CB_INTRA16x16DC, 0, 0, levarr, runarr, &numcoeff); + else + readCoeff4x4_CAVLC(currMB, CR_INTRA16x16DC, 0, 0, levarr, runarr, &numcoeff); + + coef_ctr=-1; + level = 1; // just to get inside the loop + + for(k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) // leave if level == 0 + { + coef_ctr += runarr[k] + 1; + + i0 = pos_scan4x4[coef_ctr][0]; + j0 = pos_scan4x4[coef_ctr][1]; + currSlice->cof4[uv + 1][cof4_pos_to_subblock[j0][i0]][0][0] = levarr[k];// add new intra DC coeff + } //if leavarr[k] + } //k loop + + if(currMB->is_lossless == FALSE) + { + itrans_2(currMB, (ColorPlane) (uv + 1)); // transform new intra DC + } + } //IS_I16MB + + update_qp(currMB, p_Vid->qp); + + qp_per = p_Vid->qp_per_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ]; + qp_rem = p_Vid->qp_rem_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ]; + + //init constants for every chroma qp offset + qp_per_uv[uv] = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ]; + qp_rem_uv[uv] = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ]; + + InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + InvLevelScale8x8 = intra? currSlice->InvLevelScale8x8_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale8x8_Inter[uv + 1][qp_rem_uv[uv]]; + + if (!currMB->luma_transform_size_8x8_flag) // 4x4 transform + { + readCompCoeff4x4MB_CAVLC(currMB, (ColorPlane) (PLANE_U + uv), InvLevelScale4x4, qp_per_uv[uv], cbp, p_Vid->nz_coeff[mb_nr][PLANE_U + uv]); + } + else // 8x8 transform + { + readCompCoeff8x8MB_CAVLC(currMB, (ColorPlane) (PLANE_U + uv), InvLevelScale8x8, qp_per_uv[uv], cbp, p_Vid->nz_coeff[mb_nr][PLANE_U + uv]); + } + } + } //444 + else if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + //========================== CHROMA DC ============================ + //----------------------------------------------------------------- + // chroma DC coeff + if(cbp>15) + { + if (dec_picture->chroma_format_idc == YUV420) + { + for (ll=0;ll<3;ll+=2) + { + uv = ll>>1; + + InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + //===================== CHROMA DC YUV420 ====================== + memset(&currSlice->cofu[0], 0, 4 *sizeof(int)); + coef_ctr=-1; + + readCoeff4x4_CAVLC(currMB, CHROMA_DC, 0, 0, levarr, runarr, &numcoeff); + + for(k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) + { + currMB->cbp_blk[0] |= 0xf0000 << (ll<<1) ; + coef_ctr += runarr[k] + 1; + currSlice->cofu[coef_ctr]=levarr[k]; + } + } + + if (smb || (currMB->is_lossless == TRUE)) // check to see if MB type is SPred or SIntra4x4 + { + currSlice->cof4[uv + 1][0][0][0] = currSlice->cofu[0]; + currSlice->cof4[uv + 1][1][0][0] = currSlice->cofu[1]; + currSlice->cof4[uv + 1][2][0][0] = currSlice->cofu[2]; + currSlice->cof4[uv + 1][3][0][0] = currSlice->cofu[3]; + } + else + { + ihadamard2x2(currSlice->cofu, temp); + + currSlice->cof4[uv + 1][0][0][0] = (((temp[0] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + currSlice->cof4[uv + 1][1][0][0] = (((temp[1] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + currSlice->cof4[uv + 1][2][0][0] = (((temp[2] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + currSlice->cof4[uv + 1][3][0][0] = (((temp[3] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5); + } + } + } + else if (dec_picture->chroma_format_idc == YUV422) + { + for (ll=0;ll<3;ll+=2) + { + int (*InvLevelScale4x4)[4] = NULL; + uv = ll>>1; + { + h264_short_block_t *imgcof = currSlice->cof4[uv + 1]; + int m3[2][4] = {{0,0,0,0},{0,0,0,0}}; + int m4[2][4] = {{0,0,0,0},{0,0,0,0}}; + int qp_per_uv_dc = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only + int qp_rem_uv_dc = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only + if (intra) + InvLevelScale4x4 = currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv_dc]; + else + InvLevelScale4x4 = currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv_dc]; + + + //===================== CHROMA DC YUV422 ====================== + readCoeff4x4_CAVLC(currMB, CHROMA_DC, 0, 0, levarr, runarr, &numcoeff); + coef_ctr=-1; + level=1; + for(k = 0; k < numcoeff; ++k) + { + if (levarr[k] != 0) + { + currMB->cbp_blk[0] |= ((int64)0xff0000) << (ll<<2); + coef_ctr += runarr[k]+1; + i0 = SCAN_YUV422[coef_ctr][0]; + j0 = SCAN_YUV422[coef_ctr][1]; + + m3[i0][j0]=levarr[k]; + } + } + + // inverse CHROMA DC YUV422 transform + // horizontal + if(currMB->is_lossless == FALSE) + { + m4[0][0] = m3[0][0] + m3[1][0]; + m4[0][1] = m3[0][1] + m3[1][1]; + m4[0][2] = m3[0][2] + m3[1][2]; + m4[0][3] = m3[0][3] + m3[1][3]; + + m4[1][0] = m3[0][0] - m3[1][0]; + m4[1][1] = m3[0][1] - m3[1][1]; + m4[1][2] = m3[0][2] - m3[1][2]; + m4[1][3] = m3[0][3] - m3[1][3]; + + for (i = 0; i < 2; ++i) + { + m6[0] = m4[i][0] + m4[i][2]; + m6[1] = m4[i][0] - m4[i][2]; + m6[2] = m4[i][1] - m4[i][3]; + m6[3] = m4[i][1] + m4[i][3]; + + imgcof[cof4_pos_to_subblock[0][i]][0][0] = m6[0] + m6[3]; + imgcof[cof4_pos_to_subblock[1][i]][0][0] = m6[1] + m6[2]; + imgcof[cof4_pos_to_subblock[2][i]][0][0] = m6[1] - m6[2]; + imgcof[cof4_pos_to_subblock[3][i]][0][0] = m6[0] - m6[3]; + }//for (i=0;i<2;++i) + } + else + { + currSlice->cof4[uv + 1][0][0][0] = m3[0][0]; + currSlice->cof4[uv + 1][1][0][0] = m3[1][0]; + currSlice->cof4[uv + 1][2][0][0] = m3[0][1]; + currSlice->cof4[uv + 1][3][0][0] = m3[1][1]; + currSlice->cof4[uv + 1][8][0][0] = m3[0][2]; + currSlice->cof4[uv + 1][9][0][0] = m3[1][2]; + currSlice->cof4[uv + 1][10][0][0] = m3[0][3]; + currSlice->cof4[uv + 1][11][0][0] = m3[1][3]; + } + + for(j = 0;j < 16; j += BLOCK_SIZE) + { + for(i=0;i < 8;i+=BLOCK_SIZE) + { + imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] = rshift_rnd_sf((imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] * InvLevelScale4x4[0][0]) << qp_per_uv_dc, 6); + } + } + } + }//for (ll=0;ll<3;ll+=2) + }//else if (dec_picture->chroma_format_idc == YUV422) + } + + //========================== CHROMA AC ============================ + //----------------------------------------------------------------- + // chroma AC coeff, all zero fram start_scan + if (cbp<=31) + { + memset(&p_Vid->nz_coeff [mb_nr ][1][0][0], 0, 2 * BLOCK_SIZE * BLOCK_SIZE * sizeof(byte)); + } + else + { + if(currMB->is_lossless == FALSE) + { + for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8) + { + currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 )); + InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + + for (b4=0; b4 < 4; ++b4) + { + i = cofuv_blk_x[yuv][b8][b4]; + j = cofuv_blk_y[yuv][b8][b4]; + + readCoeff4x4_CAVLC(currMB, CHROMA_AC, i + 2*uv, j + 4, levarr, runarr, &numcoeff); + coef_ctr = 0; + + for(k = 0; k < numcoeff;++k) + { + if (levarr[k] != 0) + { + currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4]; + coef_ctr += runarr[k] + 1; + + i0=pos_scan4x4[coef_ctr][0]; + j0=pos_scan4x4[coef_ctr][1]; + + currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][j0][i0] = rshift_rnd_sf((levarr[k] * InvLevelScale4x4[j0][i0])<<qp_per_uv[uv], 4); + } + } + } + } + } + else + { + int type; + currMB->is_intra_block = IS_INTRA(currMB); + type = (currMB->is_intra_block ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER); + + dP = &(currSlice->partArr[partMap[type]]); + currSE.mapping = linfo_levrun_inter; + + if(currMB->is_lossless == FALSE) + { + for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8) + { + currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 )); + InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]]; + + for (b4 = 0; b4 < 4; ++b4) + { + i = cofuv_blk_x[yuv][b8][b4]; + j = cofuv_blk_y[yuv][b8][b4]; + + currMB->subblock_y = subblk_offset_y[yuv][b8][b4]; + currMB->subblock_x = subblk_offset_x[yuv][b8][b4]; + + pos_scan_4x4 = pos_scan4x4[1]; + + for(k = 0; k < 16;k++) + { + readSyntaxElement_UVLC(&currSE, dP); + level = currSE.value1; + + if (level != 0) + { + currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4]; + pos_scan_4x4 += (currSE.value2 << 1); + + i0 = *pos_scan_4x4++; + j0 = *pos_scan_4x4++; + + currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][j0][i0] = rshift_rnd_sf((level * InvLevelScale4x4[j0][i0])<<qp_per_uv[uv], 4); + } + else + break; + } //for(k=0;(k<16)&&(level!=0);++k) + } + } + } + else + { + for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8) + { + currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 )); + + for (b4=0; b4 < 4; ++b4) + { + i = cofuv_blk_x[yuv][b8][b4]; + j = cofuv_blk_y[yuv][b8][b4]; + + pos_scan_4x4 = pos_scan4x4[1]; + + currMB->subblock_y = subblk_offset_y[yuv][b8][b4]; + currMB->subblock_x = subblk_offset_x[yuv][b8][b4]; + + for(k=0;k<16;++k) + { + readSyntaxElement_UVLC(&currSE, dP); + level = currSE.value1; + + if (level != 0) + { + currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4]; + pos_scan_4x4 += (currSE.value2 << 1); + + i0 = *pos_scan_4x4++; + j0 = *pos_scan_4x4++; + + currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][j0][i0] = level; + } + else + break; + } + } + } + } //for (b4=0; b4 < 4; b4++) + } //for (b8=0; b8 < p_Vid->num_blk8x8_uv; b8++) + } //if (dec_picture->chroma_format_idc != YUV400) + } +} + + +/*! +************************************************************************ +* \brief +* decode one color component in an I slice +************************************************************************ +*/ + +static int decode_one_component_i_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + //For residual DPCM + currMB->ipmode_DPCM = NO_INTRA_PMODE; + if(currMB->mb_type == IPCM) + mb_pred_ipcm(currMB); + else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16 + mb_pred_intra16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I4MB) + mb_pred_intra4x4(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I8MB) + mb_pred_intra8x8(currMB, curr_plane, image, dec_picture); + + return 1; +} + +/*! +************************************************************************ +* \brief +* decode one color component for a p slice +************************************************************************ +*/ +static int decode_one_component_p_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + //For residual DPCM + currMB->ipmode_DPCM = NO_INTRA_PMODE; + if(currMB->mb_type == IPCM) + mb_pred_ipcm(currMB); + else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16 + mb_pred_intra16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I4MB) + mb_pred_intra4x4(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I8MB) + mb_pred_intra8x8(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == PSKIP) + mb_pred_skip(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P16x16) + mb_pred_p_inter16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P16x8) + mb_pred_p_inter16x8(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P8x16) + mb_pred_p_inter8x16(currMB, curr_plane, image, dec_picture); + else + mb_pred_p_inter8x8(currMB, curr_plane, image, dec_picture); + + return 1; +} + + +/*! +************************************************************************ +* \brief +* decode one color component for a sp slice +************************************************************************ +*/ +static int decode_one_component_sp_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + //For residual DPCM + currMB->ipmode_DPCM = NO_INTRA_PMODE; + + if(currMB->mb_type == IPCM) + mb_pred_ipcm(currMB); + else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16 + mb_pred_intra16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I4MB) + mb_pred_intra4x4(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I8MB) + mb_pred_intra8x8(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == PSKIP) + mb_pred_sp_skip(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P16x16) + mb_pred_p_inter16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P16x8) + mb_pred_p_inter16x8(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P8x16) + mb_pred_p_inter8x16(currMB, curr_plane, image, dec_picture); + else + mb_pred_p_inter8x8(currMB, curr_plane, image, dec_picture); + + return 1; +} + +static void set_chroma_vector(Macroblock *currMB, int *list_offset) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + if (!currSlice->mb_aff_frame_flag) + { + if(p_Vid->structure == TOP_FIELD) + { + int k,l; + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + if(p_Vid->structure != p_Vid->listX[l][k]->structure) + p_Vid->listX[l][k]->chroma_vector_adjustment = -2; + else + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + else if(p_Vid->structure == BOTTOM_FIELD) + { + int k,l; + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + if (p_Vid->structure != p_Vid->listX[l][k]->structure) + p_Vid->listX[l][k]->chroma_vector_adjustment = 2; + else + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + else + { + int k,l; + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + } + else + { + int mb_nr = (currMB->mbAddrX & 0x01); + int k,l; + + ////////////////////////// + // find out the correct list offsets + if (currMB->mb_field) + { + *list_offset = mb_nr ? 4 : 2; + + for (l = LIST_0 + *list_offset; l <= (LIST_1 + *list_offset); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + if(mb_nr == 0 && p_Vid->listX[l][k]->structure == BOTTOM_FIELD) + p_Vid->listX[l][k]->chroma_vector_adjustment = -2; + else if(mb_nr == 1 && p_Vid->listX[l][k]->structure == TOP_FIELD) + p_Vid->listX[l][k]->chroma_vector_adjustment = 2; + else + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + else + { + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + } + + p_Vid->max_mb_vmv_r = (p_Vid->structure != FRAME || (currSlice->mb_aff_frame_flag && currMB->mb_field)) ? p_Vid->max_vmv_r >> 1 : p_Vid->max_vmv_r; +} + + +static void mb_pred_b_dspatial(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + char l0_rFrame = -1, l1_rFrame = -1; + PicMotionParams *motion = &dec_picture->motion; + MotionVector pmvl0={0,0}, pmvl1={0,0}; + int k; + int block8x8; + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + + MotionParams *colocated = &currSlice->p_colocated->frame; + int list_offset = 0; + int pred_dir = 0; + + Boolean has_zero_partitions = FALSE; + h264_ref_t *ref_pic_num_l0, *ref_pic_num_l1; + + set_chroma_vector(currMB, &list_offset); + + if (currMB->mb_field) + { + if(currMB->mbAddrX & 0x01) + { + colocated = &currSlice->p_colocated->bottom; + } + else + { + colocated = &currSlice->p_colocated->top; + } + } + + prepare_direct_params(currMB, dec_picture, pmvl0, pmvl1, &l0_rFrame, &l1_rFrame); + + ref_pic_num_l0 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset]; + ref_pic_num_l1 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset]; + + if (p_Vid->active_sps->direct_8x8_inference_flag) + { + if (l0_rFrame >=0 && l1_rFrame >=0) + { + PicMotion **motion0 = &motion->motion[LIST_0][currMB->block_y]; + PicMotion **motion1 = &motion->motion[LIST_1][currMB->block_y]; + int block_x = currMB->block_x; + has_zero_partitions = TRUE; + pred_dir = 2; + if (p_Vid->listX[LIST_1 + list_offset][0]->is_long_term) + { // long term + //--- + memcpy(motion0[0][block_x + 0].mv, pmvl0, sizeof(MotionVector)); + motion0[0][block_x + 0].ref_idx = l0_rFrame; + memcpy(motion1[0][block_x + 0].mv, pmvl1, sizeof(MotionVector)); + motion1[0][block_x + 0].ref_idx = l1_rFrame; + motion0[0][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 0].ref_idx]; + motion1[0][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 0].ref_idx]; + memcpy(motion0[0][block_x + 1].mv, pmvl0, sizeof(MotionVector)); + motion0[0][block_x + 1].ref_idx = l0_rFrame; + memcpy(motion1[0][block_x + 1].mv, pmvl1, sizeof(MotionVector)); + motion1[0][block_x + 1].ref_idx = l1_rFrame; + motion0[0][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 1].ref_idx]; + motion1[0][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 1].ref_idx]; + memcpy(motion0[1][block_x + 0].mv, pmvl0, sizeof(MotionVector)); + motion0[1][block_x + 0].ref_idx = l0_rFrame; + memcpy(motion1[1][block_x + 0].mv, pmvl1, sizeof(MotionVector)); + motion1[1][block_x + 0].ref_idx = l1_rFrame; + motion0[1][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 0].ref_idx]; + motion1[1][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 0].ref_idx]; + memcpy(motion0[1][block_x + 1].mv, pmvl0, sizeof(MotionVector)); + motion0[1][block_x + 1].ref_idx = l0_rFrame; + memcpy(motion1[1][block_x + 1].mv, pmvl1, sizeof(MotionVector)); + motion1[1][block_x + 1].ref_idx = l1_rFrame; + motion0[1][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 1].ref_idx]; + motion1[1][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 1].ref_idx]; + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 0, 0, list_offset, curr_mb_field); + //--- + memcpy(motion0[0][block_x + 2].mv, pmvl0, sizeof(MotionVector)); + motion0[0][block_x + 2].ref_idx = l0_rFrame; + memcpy(motion1[0][block_x + 2].mv, pmvl1, sizeof(MotionVector)); + motion1[0][block_x + 2].ref_idx = l1_rFrame; + motion0[0][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 2].ref_idx]; + motion1[0][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 2].ref_idx]; + memcpy(motion0[0][block_x + 3].mv, pmvl0, sizeof(MotionVector)); + motion0[0][block_x + 3].ref_idx = l0_rFrame; + memcpy(motion1[0][block_x + 3].mv, pmvl1, sizeof(MotionVector)); + motion1[0][block_x + 3].ref_idx = l1_rFrame; + motion0[0][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 3].ref_idx]; + motion1[0][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 3].ref_idx]; + memcpy(motion0[1][block_x + 2].mv, pmvl0, sizeof(MotionVector)); + motion0[1][block_x + 2].ref_idx = l0_rFrame; + memcpy(motion1[1][block_x + 2].mv, pmvl1, sizeof(MotionVector)); + motion1[1][block_x + 2].ref_idx = l1_rFrame; + motion0[1][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 2].ref_idx]; + motion1[1][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 2].ref_idx]; + memcpy(motion0[1][block_x + 3].mv, pmvl0, sizeof(MotionVector)); + motion0[1][block_x + 3].ref_idx = l0_rFrame; + memcpy(motion1[1][block_x + 3].mv, pmvl1, sizeof(MotionVector)); + motion1[1][block_x + 3].ref_idx = l1_rFrame; + motion0[1][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 3].ref_idx]; + motion1[1][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 3].ref_idx]; + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 2, 0, list_offset, curr_mb_field); + //--- + memcpy(motion0[2][block_x + 0].mv, pmvl0, sizeof(MotionVector)); + motion0[2][block_x + 0].ref_idx = l0_rFrame; + memcpy(motion1[2][block_x + 0].mv, pmvl1, sizeof(MotionVector)); + motion1[2][block_x + 0].ref_idx = l1_rFrame; + motion0[2][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 0].ref_idx]; + motion1[2][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 0].ref_idx]; + memcpy(motion0[2][block_x + 1].mv, pmvl0, sizeof(MotionVector)); + motion0[2][block_x + 1].ref_idx = l0_rFrame; + memcpy(motion1[2][block_x + 1].mv, pmvl1, sizeof(MotionVector)); + motion1[2][block_x + 1].ref_idx = l1_rFrame; + motion0[2][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 1].ref_idx]; + motion1[2][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 1].ref_idx]; + memcpy(motion0[3][block_x + 0].mv, pmvl0, sizeof(MotionVector)); + motion0[3][block_x + 0].ref_idx = l0_rFrame; + memcpy(motion1[3][block_x + 0].mv, pmvl1, sizeof(MotionVector)); + motion1[3][block_x + 0].ref_idx = l1_rFrame; + motion0[3][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 0].ref_idx]; + motion1[3][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 0].ref_idx]; + memcpy(motion0[3][block_x + 1].mv, pmvl0, sizeof(MotionVector)); + motion0[3][block_x + 1].ref_idx = l0_rFrame; + memcpy(motion1[3][block_x + 1].mv, pmvl1, sizeof(MotionVector)); + motion1[3][block_x + 1].ref_idx = l1_rFrame; + motion0[3][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 1].ref_idx]; + motion1[3][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 1].ref_idx]; + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 0, 2, list_offset, curr_mb_field); + //--- + memcpy(motion0[2][block_x + 2].mv, pmvl0, sizeof(MotionVector)); + motion0[2][block_x + 2].ref_idx = l0_rFrame; + memcpy(motion1[2][block_x + 2].mv, pmvl1, sizeof(MotionVector)); + motion1[2][block_x + 2].ref_idx = l1_rFrame; + motion0[2][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 2].ref_idx]; + motion1[2][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 2].ref_idx]; + memcpy(motion0[2][block_x + 3].mv, pmvl0, sizeof(MotionVector)); + motion0[2][block_x + 3].ref_idx = l0_rFrame; + memcpy(motion1[2][block_x + 3].mv, pmvl1, sizeof(MotionVector)); + motion1[2][block_x + 3].ref_idx = l1_rFrame; + motion0[2][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 3].ref_idx]; + motion1[2][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 3].ref_idx]; + memcpy(motion0[3][block_x + 2].mv, pmvl0, sizeof(MotionVector)); + motion0[3][block_x + 2].ref_idx = l0_rFrame; + memcpy(motion1[3][block_x + 2].mv, pmvl1, sizeof(MotionVector)); + motion1[3][block_x + 2].ref_idx = l1_rFrame; + motion0[3][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 2].ref_idx]; + motion1[3][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 2].ref_idx]; + memcpy(motion0[3][block_x + 3].mv, pmvl0, sizeof(MotionVector)); + motion0[3][block_x + 3].ref_idx = l0_rFrame; + memcpy(motion1[3][block_x + 3].mv, pmvl1, sizeof(MotionVector)); + motion1[3][block_x + 3].ref_idx = l1_rFrame; + motion0[3][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 3].ref_idx]; + motion1[3][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 3].ref_idx]; + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 2, 2, list_offset, curr_mb_field); + } + else + { // not long term + const byte **colocated_moving_block = &colocated->moving_block[currMB->block_y_aff]; + for (block8x8 = 0; block8x8 < 4; block8x8++) + { + int k_start = (block8x8 << 2); + for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + int i4 = currMB->block_x + i; + + //===== DIRECT PREDICTION ===== + if (!l0_rFrame && !colocated_moving_block[j][i4]) + { + motion0[j][i4].mv[0] = 0; + motion0[j][i4].mv[1] = 0; + motion0[j][i4].ref_idx = 0; + } + else + { + motion0[j][i4].mv[0] = pmvl0[0]; + motion0[j][i4].mv[1] = pmvl0[1]; + motion0[j][i4].ref_idx = l0_rFrame; + } + + if (l1_rFrame == 0 && !colocated_moving_block[j][i4]) + { + motion1[j][i4].mv[0] = 0; + motion1[j][i4].mv[1] = 0; + motion1[j][i4].ref_idx = 0; + } + else + { + motion1[j][i4].mv[0] = pmvl1[0]; + motion1[j][i4].mv[1] = pmvl1[1]; + motion1[j][i4].ref_idx = l1_rFrame; + } + + motion0[j][i4].ref_pic_id = ref_pic_num_l0[(short)motion0[j][i4].ref_idx]; + motion1[j][i4].ref_pic_id = ref_pic_num_l1[(short)motion1[j][i4].ref_idx]; + } + + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, (decode_block_scan[k_start] & 3), ((decode_block_scan[k_start] >> 2) & 3), list_offset, curr_mb_field); + } + } + } + else + { + for (block8x8 = 0; block8x8 < 4; block8x8++) + { + int k_start = (block8x8 << 2); + for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int j6 = currMB->block_y_aff + j; + + //printf("%d %d\n", i, j); + + //===== DIRECT PREDICTION ===== + + if (l0_rFrame >=0) + { + if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + has_zero_partitions = TRUE; + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = 0; + } + else + { + has_zero_partitions = TRUE; + motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0]; + motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1]; + motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame; + } + } + else + { + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = -1; + } + + if (l1_rFrame >=0) + { + if (l1_rFrame == 0 && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + has_zero_partitions = TRUE; + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + else + { + has_zero_partitions = TRUE; + motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0]; + motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1]; + motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame; + } + } + else + { + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = -1; + } + + if (l1_rFrame == -1) + pred_dir = 0; + else if (l0_rFrame == -1) + pred_dir = 1; + else + pred_dir = 2; + + if (l0_rFrame < 0 && l1_rFrame < 0) + { + motion->motion[LIST_0][j4][i4].ref_idx = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + pred_dir = 2; + } + + motion->motion[LIST_0][j4][i4].ref_pic_id = ref_pic_num_l0[(short)motion->motion[LIST_0][j4][i4].ref_idx]; + motion->motion[LIST_1][j4][i4].ref_pic_id = ref_pic_num_l1[(short)motion->motion[LIST_1][j4][i4].ref_idx]; + } + + if (has_zero_partitions == TRUE) + { + int i = (decode_block_scan[k_start] & 3); + int j = ((decode_block_scan[k_start] >> 2) & 3); + + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field); + } + } + } + } + else + { + for (block8x8 = 0; block8x8 < 4; block8x8++) + { + int k_start = (block8x8 << 2); + int k_end = k_start + BLOCK_MULTIPLE; + + for (k = k_start; k < k_end; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int j6 = currMB->block_y_aff + j; + + //===== DIRECT PREDICTION ===== + + if (l0_rFrame >=0) + { + if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + has_zero_partitions = TRUE; + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = 0; + } + else + { + has_zero_partitions = TRUE; + motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0]; + motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1]; + motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame; + } + } + else + { + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = -1; + } + + if (l1_rFrame >=0) + { + if (l1_rFrame == 0 && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + has_zero_partitions = TRUE; + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + else + { + has_zero_partitions = TRUE; + motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0]; + motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1]; + motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame; + } + } + else + { + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = -1; + } + + if (l0_rFrame < 0 && l1_rFrame < 0) + { + motion->motion[LIST_0][j4][i4].ref_idx = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + + if (l1_rFrame == -1) + { + if (l0_rFrame == -1) + pred_dir = 2; + else + pred_dir = 0; + } + else if (l0_rFrame == -1) + { + pred_dir = 1; + } + else + pred_dir = 2; + + motion->motion[LIST_0][j4][i4].ref_pic_id = ref_pic_num_l0[(short)motion->motion[LIST_0][j4][i4].ref_idx]; + motion->motion[LIST_1][j4][i4].ref_pic_id = ref_pic_num_l1[(short)motion->motion[LIST_1][j4][i4].ref_idx]; + } + + if (has_zero_partitions == TRUE) + { + for (k = k_start; k < k_end; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, BLOCK_SIZE, BLOCK_SIZE, curr_mb_field); + } + } + } + } + + if (has_zero_partitions == FALSE) + { + perform_mc16x16(currMB, curr_plane, dec_picture, pred_dir, list_offset, curr_mb_field); + } + + if (currMB->cbp == 0) + { + opt_copy_image_data_16x16_stride(image, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[curr_plane]); + + if (dec_picture->chroma_format_idc == YUV420) + { + copy_image_data_8x8_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1]); + copy_image_data_8x8_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2]); + } + else if (dec_picture->chroma_format_idc == YUV422) + { + copy_image_data_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1], 8, 16); + copy_image_data_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2], 8, 16); + } + } + else + iTransform(currMB, curr_plane, 0); +} + + + +/*! +************************************************************************ +* \brief +* decode one color component for a b slice +************************************************************************ +*/ + +static int decode_one_component_b_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + //For residual DPCM + currMB->ipmode_DPCM = NO_INTRA_PMODE; + + if(currMB->mb_type == IPCM) + mb_pred_ipcm(currMB); + else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16 + mb_pred_intra16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I4MB) + mb_pred_intra4x4(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == I8MB) + mb_pred_intra8x8(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P16x16) + mb_pred_p_inter16x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P16x8) + mb_pred_p_inter16x8(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == P8x16) + mb_pred_p_inter8x16(currMB, curr_plane, image, dec_picture); + else if (currMB->mb_type == BSKIP_DIRECT) + { + if (currMB->p_Slice->direct_spatial_mv_pred_flag == 0) + mb_pred_b_dtemporal (currMB, curr_plane, image, dec_picture); + else + mb_pred_b_dspatial (currMB, curr_plane, image, dec_picture); + } + else + mb_pred_b_inter8x8 (currMB, curr_plane, image, dec_picture); + + return 1; +} + +/*! +************************************************************************ +* \brief +* decode one macroblock +************************************************************************ +*/ + +int decode_one_macroblock(Macroblock *currMB, StorablePicture *dec_picture) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + // luma decoding ************************************************** + currSlice->decode_one_component(currMB, PLANE_Y, dec_picture->imgY, dec_picture); + + if ((p_Vid->active_sps->chroma_format_idc==YUV444)&&(!IS_INDEPENDENT(p_Vid))) + { + currSlice->decode_one_component(currMB, PLANE_U, dec_picture->imgUV[0], dec_picture); + currSlice->decode_one_component(currMB, PLANE_V, dec_picture->imgUV[1], dec_picture); + } + return 0; +} + + +/*! +************************************************************************ +* \brief +* change target plane +* for 4:4:4 Independent mode +************************************************************************ +*/ +void change_plane_JV( VideoParameters *p_Vid, int nplane ) +{ + Slice *currSlice = p_Vid->currentSlice; + p_Vid->colour_plane_id = nplane; + p_Vid->mb_data = p_Vid->mb_data_JV[nplane]; + p_Vid->dec_picture = p_Vid->dec_picture_JV[nplane]; + currSlice->p_colocated = currSlice->Co_located_JV[nplane]; +} + +/*! +************************************************************************ +* \brief +* make frame picture from each plane data +* for 4:4:4 Independent mode +************************************************************************ +*/ +void make_frame_picture_JV(VideoParameters *p_Vid) +{ + int uv, line; + int nsize; + int nplane; + p_Vid->dec_picture = p_Vid->dec_picture_JV[0]; + + // Copy Storable Params + for( nplane=0; nplane<MAX_PLANE; nplane++ ) + { + copy_storable_param_JV( p_Vid, &p_Vid->dec_picture->JVmotion[nplane], &p_Vid->dec_picture_JV[nplane]->motion ); + } + + // This could be done with pointers and seems not necessary + for( uv=0; uv<2; uv++ ) + { + for( line=0; line<p_Vid->height; line++ ) + { + nsize = sizeof(imgpel) * p_Vid->width; + memcpy( p_Vid->dec_picture->imgUV[uv]->img[line], p_Vid->dec_picture_JV[uv+1]->imgY->img[line], nsize ); + } + free_storable_picture(p_Vid, p_Vid->dec_picture_JV[uv+1]); + } +} + + diff --git a/Src/h264dec/ldecod/src/mb_access.c b/Src/h264dec/ldecod/src/mb_access.c new file mode 100644 index 00000000..70f3aee2 --- /dev/null +++ b/Src/h264dec/ldecod/src/mb_access.c @@ -0,0 +1,3388 @@ + +/*! +************************************************************************************* +* \file mb_access.c +* +* \brief +* Functions for macroblock neighborhoods +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Karsten Sühring <suehring@hhi.de> +************************************************************************************* +*/ + +#include "global.h" +#include "mbuffer.h" +#include "mb_access.h" + +/*! +************************************************************************ +* \brief +* returns 1 if the macroblock at the given address is available +************************************************************************ +*/ +Boolean mb_is_available(int mbAddr, const Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + if ((mbAddr < 0) || (mbAddr > ((int)p_Vid->dec_picture->PicSizeInMbs - 1))) + return FALSE; + + // the following line checks both: slice number and if the mb has been decoded + if (!p_Vid->DeblockCall) + { + if (p_Vid->mb_data[mbAddr].slice_nr != currMB->slice_nr) + return FALSE; + } + + return TRUE; +} + + +/*! +************************************************************************ +* \brief +* Checks the availability of neighboring macroblocks of +* the current macroblock for prediction and context determination; +************************************************************************ +*/ +void CheckAvailabilityOfNeighbors(Macroblock *currMB) +{ + VideoParameters *p_Vid = currMB->p_Vid; + const int mb_nr = currMB->mbAddrX; + + // mark all neighbors as unavailable + currMB->mb_up = NULL; + currMB->mb_left = NULL; + + if (p_Vid->dec_picture->mb_aff_frame_flag) + { + int cur_mb_pair = mb_nr >> 1; + currMB->mb_addr_left = 2 * (cur_mb_pair - 1); + currMB->mb_addr_up = 2 * (cur_mb_pair - p_Vid->dec_picture->PicWidthInMbs); + currMB->mb_addr_upper_right = 2 * (cur_mb_pair - p_Vid->dec_picture->PicWidthInMbs + 1); + currMB->mb_addr_upper_left = 2 * (cur_mb_pair - p_Vid->dec_picture->PicWidthInMbs - 1); + + currMB->mb_avail_left = (Boolean) (mb_is_available(currMB->mb_addr_left, currMB) && ((p_Vid->PicPos[cur_mb_pair ][0])!=0)); + currMB->mb_avail_up = (Boolean) (mb_is_available(currMB->mb_addr_up, currMB)); + currMB->mb_avail_upper_right = (Boolean) (mb_is_available(currMB->mb_addr_upper_right, currMB) && ((p_Vid->PicPos[cur_mb_pair + 1][0])!=0)); + currMB->mb_avail_upper_left = (Boolean) (mb_is_available(currMB->mb_addr_upper_left, currMB) && ((p_Vid->PicPos[cur_mb_pair ][0])!=0)); + } + else + { + currMB->mb_addr_left = mb_nr - 1; // left? + currMB->mb_addr_up = mb_nr - p_Vid->dec_picture->PicWidthInMbs; // up? + currMB->mb_addr_upper_right = mb_nr - p_Vid->dec_picture->PicWidthInMbs + 1; // upper right? + currMB->mb_addr_upper_left = mb_nr - p_Vid->dec_picture->PicWidthInMbs - 1; // upper left? + + currMB->mb_avail_left = (Boolean) (mb_is_available(currMB->mb_addr_left, currMB) && ((p_Vid->PicPos[mb_nr ][0])!=0)); + currMB->mb_avail_up = (Boolean) (mb_is_available(currMB->mb_addr_up, currMB)); + currMB->mb_avail_upper_right = (Boolean) (mb_is_available(currMB->mb_addr_upper_right, currMB) && ((p_Vid->PicPos[mb_nr + 1][0])!=0)); + currMB->mb_avail_upper_left = (Boolean) (mb_is_available(currMB->mb_addr_upper_left, currMB) && ((p_Vid->PicPos[mb_nr ][0])!=0)); + } + + if (currMB->mb_avail_left) currMB->mb_left = &(p_Vid->mb_data[currMB->mb_addr_left]); + if (currMB->mb_avail_up) currMB->mb_up = &(p_Vid->mb_data[currMB->mb_addr_up]); +} + + +/*! +************************************************************************ +* \brief +* returns the x and y macroblock coordinates for a given MbAddress +************************************************************************ +*/ +void get_mb_block_pos_normal (const h264_pic_position *PicPos, int mb_addr, short *x, short *y) +{ + *x = (short) PicPos[ mb_addr ][0]; + *y = (short) PicPos[ mb_addr ][1]; +} + +/*! +************************************************************************ +* \brief +* returns the x and y macroblock coordinates for a given MbAddress +* for mbaff type slices +************************************************************************ +*/ +void get_mb_block_pos_mbaff (const h264_pic_position *PicPos, int mb_addr, short *x, short *y) +{ + *x = (short) PicPos[mb_addr>>1][0]; + *y = (short) ((PicPos[mb_addr>>1][1] << 1) + (mb_addr & 0x01)); +} + +/*! +************************************************************************ +* \brief +* returns the x and y sample coordinates for a given MbAddress +************************************************************************ +*/ +void get_mb_pos (VideoParameters *p_Vid, int mb_addr, const int mb_size[2], short *x, short *y) +{ + p_Vid->get_mb_block_pos(p_Vid->PicPos, mb_addr, x, y); + + (*x) = (short) ((*x) * mb_size[0]); + (*y) = (short) ((*y) * mb_size[1]); +} + + +/*! +************************************************************************ +* \brief +* get neighbouring positions for non-aff coding +* \param currMB +* current macroblock +* \param xN +* input x position +* \param yN +* input y position +* \param mb_size +* Macroblock size in pixel (according to luma or chroma MB access) +* \param pix +* returns position informations +************************************************************************ +*/ +void getNonAffNeighbour(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + if (xN < 0) + { + if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + } + else if (yN < maxH) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + } + else if (xN < maxW) + { + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else if (yN < maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & (maxW - 1)); + pix->pos_x = (short) (pix->x + *(CurPos++) * maxW); + pix->y = (short) (yN & (maxH - 1)); + pix->pos_y = (short) (pix->y + *CurPos * maxH); + } +} + +void getNonAffNeighbourXP_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + if (xN < 0) + { + if (yN < maxH) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + } + else if (xN < maxW) + { + if (yN < maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else + { + pix->available = FALSE; + } + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yN & (maxH - 1)); + } +} + +void getNonAffNeighbourPX_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + if (xN < maxW) + { + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else if (yN < maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + } + else + { + pix->available = FALSE; + } + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yN & (maxH - 1)); + } +} + +void getNonAffNeighbourLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + if (xN < 0) + { + if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + } + else if (yN < 16) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + } + else if (xN < 16) + { + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else if (yN < 16) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & 15); + pix->pos_x = (short) (pix->x + *(CurPos++) * 16); + pix->y = (short) (yN & 15); + pix->pos_y = (short) (pix->y + *CurPos * 16); + } +} + +void getNonAffNeighbourXPLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + + if (xN < 0) + { + if (yN < 16) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + } + else if (xN < 16) + { + if (yN < 16) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & 15); + pix->pos_x = (short) (pix->x + *(CurPos++) * 16); + pix->y = (short) (yN & 15); + pix->pos_y = (short) (pix->y + *CurPos * 16); + } +} + + +void getNonAffNeighbourXPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + assert(!p_Vid->DeblockCall); + if (xN < 0) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->pos_x = (short) ((xN & 15) + *(CurPos++) * 16); + pix->pos_y = (short) (yN + *CurPos * 16); + } +} + +void getNonAffNeighbourPPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0, xN >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + assert(!p_Vid->DeblockCall); + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->pos_x = (short) ((xN & 15) + *(CurPos++) * 16); + pix->pos_y = (short) (yN + *CurPos * 16); + } +} + + +void getNonAffNeighbourXPLumaNB_NoPos(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0 +{ + assert(!currMB->p_Vid->DeblockCall); + if (xN < 0) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + + + if (pix->available) + { + pix->x = (short) (xN & 15); + pix->y = (short) (yN); + } +} + +void getNonAffNeighbourNPLumaNB(const Macroblock *currMB, int yN, PixelPos *pix) // xN = -1, yN >= 0 && yN < 16 +{ + VideoParameters *p_Vid = currMB->p_Vid; + + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + //pix->x = (short) (-1 & 15); + pix->pos_x = (short) ((-1 & 15) + *(CurPos++) * 16); + pix->y = (short) (yN); + pix->pos_y = (short) (yN + *CurPos * 16); + } +} + + +void getNonAffNeighbourPXLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // xN is >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + + if (xN < 16) + { + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else if (yN < 16) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & 15); + pix->pos_x = (short) (pix->x + *(CurPos++) * 16); + pix->y = (short) (yN & 15); + pix->pos_y = (short) (pix->y + *CurPos * 16); + } +} + +void getNonAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // xN is >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + assert(!p_Vid->DeblockCall); + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->pos_x = (short) (xN + *(CurPos++) * 16); + pix->pos_y = (short) ((yN & 15) + *CurPos * 16); + } +} + +void getNonAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix) // xN is >= 0 +{ + assert(!currMB->p_Vid->DeblockCall); + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + + if (pix->available) + { + pix->y = (short) (yN & 15); + } +} + +void getNonAffNeighbourN0Luma(const Macroblock *currMB, PixelPos *pix) // xN = -1, yN = 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + assert(p_Vid->DeblockCall == 0); + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (-1 & 15); + pix->pos_x = (short) (pix->x + *(CurPos++) * 16); + pix->y = 0; + pix->pos_y = (short) (*CurPos * 16); + } +} + + +void getNonAffNeighbourN0(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) // xN = -1, yN = 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + assert(maxH != 0); + assert(p_Vid->DeblockCall == 0); + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (-1 & (maxW - 1)); + pix->pos_x = (short) (pix->x + *(CurPos++) * maxW); + pix->y = 0; + pix->pos_y = (short) (*CurPos * maxH); + } +} + +void getNonAffNeighbour0N(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) // xN = 0, yN = -1 +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + assert(maxW != 0); + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = 0; + pix->pos_x = (short) (*(CurPos++) * maxW); + pix->y = (short) (-1 & (maxH - 1)); + pix->pos_y = (short) (pix->y + *CurPos * maxH); + } +} + +void getNonAffNeighbour0NLuma(const Macroblock *currMB, PixelPos *pix) // xN = 0, yN = -1 +{ + VideoParameters *p_Vid = currMB->p_Vid; + + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = 0; + pix->pos_x = (short) (*(CurPos++) * 16); + pix->y = (short) (-1 & (16 - 1)); + pix->pos_y = (short) (pix->y + *CurPos * 16); + } +} + + +void getNonAffNeighbourNX(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN = -1, yN full range +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + + if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + } + else if (yN < maxH) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (-1 & (maxW - 1)); + pix->pos_x = (short) (pix->x + *(CurPos++) * maxW); + pix->y = (short) (yN & (maxH - 1)); + pix->pos_y = (short) (pix->y + *CurPos * maxH); + } +} + +void getNonAffNeighbourNXLuma(const Macroblock *currMB, int yN, PixelPos *pix) // xN = -1, yN full range +{ + VideoParameters *p_Vid = currMB->p_Vid; + + + if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + } + else if (yN < 16) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (-1 & (16 - 1)); + pix->pos_x = (short) (pix->x + *(CurPos++) * 16); + pix->y = (short) (yN & (16 - 1)); + pix->pos_y = (short) (pix->y + *CurPos * 16); + } +} + +void getNonAffNeighbourNP(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN < 0, yN >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + if (yN < maxH) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->pos_x = (short) ((-1 & (maxW - 1)) + *(CurPos++) * maxW); + pix->pos_y = (short) (yN + *CurPos * maxH); + } + } + else + { + pix->available = FALSE; + } +} + +void getNonAffNeighbourNPChromaNB(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN < 0, yN >= 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (pix->available) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->pos_x = (short) ((-1 & (maxW - 1)) + *(CurPos++) * maxW); + pix->pos_y = (short) (yN + *CurPos * maxH); + } +} + +void getNonAffNeighbour0X(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN is guaranteed to be zero +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + if (0 < maxW) + { + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else if (yN < maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else if (yN < 0) + { + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = 0; + pix->pos_x = (short) (*(CurPos++) * maxW); + pix->y = (short) (yN & (maxH - 1)); + pix->pos_y = (short) (pix->y + *CurPos * maxH); + } +} + +void getNonAffNeighbour0XLuma(const Macroblock *currMB, int yN, PixelPos *pix) // xN is guaranteed to be zero +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = 16, maxH = 16; + + if (yN<0) + { + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + } + else if (yN < 16) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = 0; + pix->pos_x = (short) (*(CurPos++) * maxW); + pix->y = (short) (yN & (maxH - 1)); + pix->pos_y = (short) (pix->y + *CurPos * maxH); + } +} + +void getNonAffNeighbourX0(const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix) // xN is full range, yN is 0 +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW = mb_size[0], maxH = mb_size[1]; + + if (xN < 0) + { + if (0 < maxH) + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + } + else + { + pix->available = FALSE; + } + } + else if (xN < maxW) + { + if (0 < maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + } + else + { + pix->available = FALSE; + } + } + else + { + pix->available = FALSE; + } + + if (pix->available || p_Vid->DeblockCall && pix->mb_addr && p_Vid) + { + const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0]; + pix->x = (short) (xN & (maxW - 1)); + pix->pos_x = (short) (pix->x + *(CurPos++) * maxW); + pix->y = 0; + pix->pos_y = (short) (*CurPos * maxH); + } +} + +/*! +************************************************************************ +* \brief +* get neighboring positions for aff coding +* \param currMB +* current macroblock +* \param xN +* input x position +* \param yN +* input y position +* \param mb_size +* Macroblock size in pixel (according to luma or chroma MB access) +* \param pix +* returns position informations +************************************************************************ +*/ +void getAffNeighbour(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + + maxW = mb_size[0]; + maxH = mb_size[1]; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + if (xN > (maxW - 1) && yN >= 0 && yN < maxH) + { + return; + } + + if (xN < 0) + { + if (yN < 0) + { + if(!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left + 1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)++; + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + if (currMB->mb_avail_upper_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field) + { + (pix->mb_addr)++; + yM = 2 * yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_left+1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + } + } + else + { // xN < 0 && yN >= 0 + if (yN <maxH) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = yN >> 1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + } + } + else + { // xN >= 0 + if (xN >= 0 && xN < maxW) + { + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if ((yN <maxH)) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + } + else + { // xN >= maxW + if(yN < 0) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + else + { + // bottom + pix->available = FALSE; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + if (currMB->mb_avail_upper_right) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + } + } + } + } + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = pix->pos_x + pix->x; + pix->pos_y = pix->pos_y + pix->y; + } +} + +void getAffNeighbourNX(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + int xN = -1; + + maxW = mb_size[0]; + maxH = mb_size[1]; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + if (xN > (maxW - 1) && yN >= 0 && yN < maxH) + { + return; + } + + if (xN < 0) + { + if (yN < 0) + { + if(!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left + 1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)++; + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + if (currMB->mb_avail_upper_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field) + { + (pix->mb_addr)++; + yM = 2 * yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_left+1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + } + } + else + { // xN < 0 && yN >= 0 + if (yN <maxH) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = yN >> 1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + } + } + else + { // xN >= 0 + if (xN >= 0 && xN < maxW) + { + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if ((yN <maxH)) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + } + else + { // xN >= maxW + if(yN < 0) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + else + { + // bottom + pix->available = FALSE; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + if (currMB->mb_avail_upper_right) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + } + } + } + } + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = pix->pos_x + pix->x; + pix->pos_y = pix->pos_y + pix->y; + } +} + +void getAffNeighbourNXLuma(const Macroblock *currMB, int yN, PixelPos *pix) +{ + const int mb_size[2]={16,16}; + getAffNeighbourNX(currMB, yN, mb_size, pix); +} +void getAffNeighbourN0(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + int xN = -1; + int yN=0; + + maxW = mb_size[0]; + maxH = mb_size[1]; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + if (xN > (maxW - 1) && yN >= 0 && yN < maxH) + { + return; + } + + if (xN < 0) + { + if (yN < 0) + { + if(!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left + 1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)++; + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + if (currMB->mb_avail_upper_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field) + { + (pix->mb_addr)++; + yM = 2 * yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_left+1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + } + } + else + { // xN < 0 && yN >= 0 + if (yN <maxH) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = yN >> 1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + } + } + else + { // xN >= 0 + if (xN >= 0 && xN < maxW) + { + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if ((yN <maxH)) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + } + else + { // xN >= maxW + if(yN < 0) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + else + { + // bottom + pix->available = FALSE; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + if (currMB->mb_avail_upper_right) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + } + } + } + } + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = pix->pos_x + pix->x; + pix->pos_y = pix->pos_y + pix->y; + } +} + + +void getAffNeighbourLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + const int maxW=16, maxH=16; + int yM = -1; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + if (xN > (maxW - 1) && yN >= 0 && yN < maxH) + { + return; + } + + if (xN < 0) + { + if (yN < 0) + { + if(!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left + 1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)++; + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_left; + pix->available = currMB->mb_avail_upper_left; + if (currMB->mb_avail_upper_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field) + { + (pix->mb_addr)++; + yM = 2 * yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_left+1; + pix->available = currMB->mb_avail_upper_left; + yM = yN; + } + } + } + else + { // xN < 0 && yN >= 0 + if (yN <maxH) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = yN >> 1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + } + } + else + { // xN >= 0 + if (xN >= 0 && xN < maxW) + { + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if (yN <maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + } + else + { // xN >= maxW + if(yN < 0) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + else + { + // bottom + pix->available = FALSE; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + if (currMB->mb_avail_upper_right) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + } + } + } + } + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = 16*pix->pos_x + pix->x; + pix->pos_y = 16*pix->pos_y + pix->y; + } +} + + +void getAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) +{ // xN >= 0, yN < 16, xN < 16 + VideoParameters *p_Vid = currMB->p_Vid; + const int maxW=16, maxH=16; + int yM = -1; + + // initialize to "not available" + pix->available = FALSE; + + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + pix->available = currMB->mb_avail_up; + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + else + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN); + pix->y = (short) (yM & (maxH - 1)); + get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = 16*pix->pos_x + pix->x; + pix->pos_y = 16*pix->pos_y + pix->y; + } +} + +void getAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix) +{ // xN >= 0, yN < 16, xN < 16, DeblockCall == 0 + VideoParameters *p_Vid = currMB->p_Vid; + int yM = -1; + + // initialize to "not available" + pix->available = FALSE; + + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + pix->available = currMB->mb_avail_up; + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + + yM = yN; + } + } + } + else + { + // yN >=0 + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + + if (pix->available) + { + pix->y = (short) (yM & 15); + } +} + + +void getAffNeighbourXPLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) +{ // yN >= 0 + VideoParameters *p_Vid = currMB->p_Vid; + const int maxW=16, maxH=16; + int yM = -1; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + if (xN > (maxW - 1) && yN < maxH) + { + return; + } + + if (xN < 0) + { + if (!currMB->mb_field) + { + // frame + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = yN >> 1; + } + } + } + else + { + // bottom + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + else if (xN < maxW) + { // xN >= 0 + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if (yN <maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = 16*pix->pos_x + pix->x; + pix->pos_y = 16*pix->pos_y + pix->y; + } +} + + +void getAffNeighbourPPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + // xN >= 0 + // yN >=0 + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + + pix->x = (short) (xN & (16 - 1)); + pix->y = (short) (yN & (16 - 1)); + get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = 16*pix->pos_x + pix->x; + pix->pos_y = 16*pix->pos_y + pix->y; +} + +void getAffNeighbourNPLuma(const Macroblock *currMB, int yN, PixelPos *pix) +{ // yN >= 0 + VideoParameters *p_Vid = currMB->p_Vid; + const int maxW=16, maxH=16; + int yM = -1; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + + if (yN <maxH) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = yN >> 1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + (pix->mb_addr)+= ((yN & 0x01) != 0); + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (yN < (maxH >> 1)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + + + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (-1 & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = 16*pix->pos_x + pix->x; + pix->pos_y = 16*pix->pos_y + pix->y; + } +} + +void getAffNeighbourN0Luma(const Macroblock *currMB, PixelPos *pix) +{ // xN = -1 && yN == 0 + VideoParameters *p_Vid = currMB->p_Vid; + //const int maxW=16, maxH=16; + int yM = -1; + + + // initialize to "not available" + pix->available = FALSE; + + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + yM = 0; + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = 0; + } + else + { + yM = 8; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + yM = 0; + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + yM = 1; + } + else + { + (pix->mb_addr)++; + yM = 0; + } + } + } + } + + + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (-1 & 15); + pix->y = (short) (yM & 15); + get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = 16*pix->pos_x + pix->x; + pix->pos_y = 16*pix->pos_y + pix->y; + } +} + +void getAffNeighbourX0(const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix) +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + + maxW = mb_size[0]; + maxH = mb_size[1]; + + // initialize to "not available" + pix->available = FALSE; + + if(0 > (maxH - 1)) + { + return; + } + if (xN > (maxW - 1) && 0 < maxH) + { + return; + } + + if (xN < 0) + { + if (0 <maxH) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + yM = 0; + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + (pix->mb_addr)++; + yM = 0; + } + else + { + yM = (0 + maxH) >> 1; + } + } + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (0 < (maxH >> 1)) + { + yM = 0; + } + else + { + (pix->mb_addr)++; + yM = (0) - maxH; + } + } + else + { + yM = 0; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_left; + pix->available = currMB->mb_avail_left; + if (currMB->mb_avail_left) + { + if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field) + { + if (0 < (maxH >> 1)) + { + yM = 1; + } + else + { + (pix->mb_addr)++; + yM = 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = 0; + } + } + } + } + } + + } + else + { // xN >= 0 + if (xN >= 0 && xN < maxW) + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = 0 - 1; + } + + else if (0 <maxH) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = 0; + } + + } + } + if (pix->available || p_Vid->DeblockCall) + { + pix->x = (short) (xN & (maxW - 1)); + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_x = pix->pos_x + pix->x; + pix->pos_y = pix->pos_y + pix->y; + } +} + +void getAffNeighbour0X(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN == 0, yN full range +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + + maxW = mb_size[0]; + maxH = mb_size[1]; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > (maxH - 1)) + { + return; + } + if (0 > (maxW - 1) && yN >= 0 && yN < maxH) + { + return; + } + + if (0 < maxW) + { + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if ((yN >= 0) && (yN <maxH)) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + } + else + { // xN >= maxW + if(yN < 0) + { + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + else + { + // bottom + pix->available = FALSE; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + if (currMB->mb_avail_upper_right) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = yN; + } + } + } + } + + if (pix->available || p_Vid->DeblockCall) + { + pix->x = 0; + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_y = pix->pos_y + pix->y; + } +} + +void getAffNeighbour0XLuma(const Macroblock *currMB, int yN, PixelPos *pix) // xN == 0, yN full range +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + + maxW = 16; + maxH = 16; + + // initialize to "not available" + pix->available = FALSE; + + if(yN > 15) + { + return; + } + + if (yN<0) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = yN; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && p_Vid->DeblockCall == 2) + { + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = TRUE; + yM = yN - 1; + } + + else if ((yN >= 0) && (yN <maxH)) + { + pix->mb_addr = currMB->mbAddrX; + pix->available = TRUE; + yM = yN; + } + } + + if (pix->available || p_Vid->DeblockCall) + { + const int mb_size[2] = {16,16}; + pix->x = 0; + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_y = pix->pos_y + pix->y; + } +} + + + +void getAffNeighbour0N(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) // xN == 0, yN = -1 +{ + VideoParameters *p_Vid = currMB->p_Vid; + int maxW, maxH; + int yM = -1; + + maxW = mb_size[0]; + maxH = mb_size[1]; + + // initialize to "not available" + pix->available = FALSE; + + if (0 < maxW) + { + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = -1; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = -1; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = -2; + } + else + { + yM = -1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = -1; + } + } + } + else + { // xN >= maxW + if (!currMB->mb_field) + { + // frame + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = -1; + } + else + { + // bottom + pix->available = FALSE; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_upper_right; + pix->available = currMB->mb_avail_upper_right; + if (currMB->mb_avail_upper_right) + { + if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field) + { + (pix->mb_addr)++; + yM = -2; + } + else + { + yM = -1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_upper_right + 1; + pix->available = currMB->mb_avail_upper_right; + yM = -1; + } + } + } + + if (pix->available || p_Vid->DeblockCall) + { + pix->x = 0; + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_y = pix->pos_y + pix->y; + } +} + +void getAffNeighbour0NLuma(const Macroblock *currMB, PixelPos *pix) // xN == 0, yN = -1 +{ + VideoParameters *p_Vid = currMB->p_Vid; + const int maxW=16, maxH=16; + int yM = -1; + + + // initialize to "not available" + pix->available = FALSE; + + if (!currMB->mb_field) + { + //frame + if ((currMB->mbAddrX & 0x01) == 0) + { + //top + pix->mb_addr = currMB->mb_addr_up; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMB->mb_avail_up) + { + if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMB->mb_avail_up; + yM = -1; + } + else + { + // bottom + pix->mb_addr = currMB->mbAddrX - 1; + pix->available = TRUE; + yM = -1; + } + } + else + { + // field + if ((currMB->mbAddrX & 0x01) == 0) + { + // top + pix->mb_addr = currMB->mb_addr_up; + pix->available = currMB->mb_avail_up; + if (currMB->mb_avail_up) + { + if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field) + { + (pix->mb_addr)++; + yM = -2; + } + else + { + yM = -1; + } + } + } + else + { + // bottom + pix->mb_addr = currMB->mb_addr_up + 1; + pix->available = currMB->mb_avail_up; + yM = -1; + } + } + + if (pix->available || p_Vid->DeblockCall) + { + const int mb_size[2] = {16,16}; + pix->x = 0; + pix->y = (short) (yM & (maxH - 1)); + get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y)); + pix->pos_y = pix->pos_y + pix->y; + } +} + + +/*! +************************************************************************ +* \brief +* get neighboring 4x4 block +* \param currMB +* current macroblock +* \param block_x +* input x block position +* \param block_y +* input y block position +* \param mb_size +* Macroblock size in pixel (according to luma or chroma MB access) +* \param pix +* returns position informations +************************************************************************ +*/ +void get4x4Neighbour(const Macroblock *currMB, int block_x, int block_y, const int mb_size[2], PixelPos *pix) +{ + currMB->p_Vid->getNeighbour(currMB, block_x, block_y, mb_size, pix); + + if (pix->available) + { + pix->x >>= 2; + pix->y >>= 2; + pix->pos_x >>= 2; + pix->pos_y >>= 2; + } +} + +void get4x4NeighbourLuma(const Macroblock *currMB, int block_x, int block_y, PixelPos *pix) +{ + currMB->p_Vid->getNeighbourLuma(currMB, block_x, block_y, pix); + + if (pix->available) + { + pix->x >>= 2; + pix->y >>= 2; + pix->pos_x >>= 2; + pix->pos_y >>= 2; + } +} diff --git a/Src/h264dec/ldecod/src/mb_prediction.c b/Src/h264dec/ldecod/src/mb_prediction.c new file mode 100644 index 00000000..799236a8 --- /dev/null +++ b/Src/h264dec/ldecod/src/mb_prediction.c @@ -0,0 +1,979 @@ +/*! +************************************************************************************* +* \file mb_prediction.c +* +* \brief +* Macroblock prediction functions +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Alexis Michael Tourapis <alexismt@ieee.org> +************************************************************************************* +*/ + +#include "contributors.h" + +#include "block.h" +#include "global.h" +#include "mbuffer.h" +#include "elements.h" +#include "errorconcealment.h" +#include "macroblock.h" +#include "fmo.h" +#include "cabac.h" +#include "vlc.h" +#include "image.h" +#include "mb_access.h" +#include "biaridecod.h" +#include "transform8x8.h" +#include "transform.h" +#include "mc_prediction.h" +#include "quant.h" +#include "intra4x4_pred.h" +#include "intra8x8_pred.h" +#include "intra16x16_pred.h" +#include "mv_prediction.h" +#include "mb_prediction.h" +#include "optim.h" + + +int mb_pred_intra4x4(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + Slice *currSlice = currMB->p_Slice; + int yuv = dec_picture->chroma_format_idc - 1; + + if (currMB->is_lossless == FALSE) + { + const h264_short_block_t *blocks = currSlice->cof4[curr_plane]; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[curr_plane]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[curr_plane]; + int block_x = currMB->block_x; + int block_y = currMB->block_y; + if (intrapred(currMB, curr_plane, 0,0,block_x + 0,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 0, 0); + if (intrapred(currMB, curr_plane, 4,0,block_x + 1,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 4, 0); + if (intrapred(currMB, curr_plane, 0,4,block_x + 0,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 0, 4); + if (intrapred(currMB, curr_plane, 4,4,block_x + 1,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 4, 4); + if (intrapred(currMB, curr_plane, 8,0,block_x + 2,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 8, 0); + if (intrapred(currMB, curr_plane, 12,0,block_x + 3,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 12, 0); + if (intrapred(currMB, curr_plane, 8,4,block_x + 2,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 8, 4); + if (intrapred(currMB, curr_plane, 12,4,block_x + 3,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 12, 4); + if (intrapred(currMB, curr_plane, 0,8,block_x + 0,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 0, 8); + if (intrapred(currMB, curr_plane, 4,8,block_x + 1,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 4, 8); + if (intrapred(currMB, curr_plane, 0,12,block_x + 0,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 0, 12); + if (intrapred(currMB, curr_plane, 4,12,block_x + 1,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 4, 12); + if (intrapred(currMB, curr_plane, 8,8,block_x + 2,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 8, 8); + if (intrapred(currMB, curr_plane, 12,8,block_x + 3,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 12, 8); + if (intrapred(currMB, curr_plane, 8,12,block_x + 2,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 8, 12); + if (intrapred(currMB, curr_plane, 12,12,block_x + 3,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 12, 12); + // benski> prediction might reference other parts of the image reconstructed during this block, so can't just do a single 16x16 image copy + } + else + { // lossless + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[curr_plane]; + int block_x = currMB->block_x; + int block_y = currMB->block_y; + + if (intrapred(currMB, curr_plane, 0,0,block_x + 0,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 0, 0); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 0, 0); + if (intrapred(currMB, curr_plane, 4,0,block_x + 1,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 4, 0); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 4, 0); + if (intrapred(currMB, curr_plane, 0,4,block_x + 0,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 0, 4); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 0, 4); + if (intrapred(currMB, curr_plane, 4,4,block_x + 1,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 4, 4); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 4, 4); + if (intrapred(currMB, curr_plane, 8,0,block_x + 2,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 8, 0); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 8, 0); + if (intrapred(currMB, curr_plane, 12,0,block_x + 3,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 12, 0); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 12, 0); + if (intrapred(currMB, curr_plane, 8,4,block_x + 2,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 8, 4); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 8, 4); + if (intrapred(currMB, curr_plane, 12,4,block_x + 3,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 12, 4); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 12, 4); + if (intrapred(currMB, curr_plane, 0,8,block_x + 0,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 0, 8); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 0, 8); + if (intrapred(currMB, curr_plane, 4,8,block_x + 1,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 4, 8); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 4, 8); + if (intrapred(currMB, curr_plane, 0,12,block_x + 0,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 0, 12); + copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 0, 12); + if (intrapred(currMB, curr_plane, 4,12,block_x + 1,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 4, 12); + copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 4, 12); + if (intrapred(currMB, curr_plane, 8,8,block_x + 2,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 8, 8); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 8, 8); + if (intrapred(currMB, curr_plane, 12,8,block_x + 3,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 12, 8); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 12, 8); + if (intrapred(currMB, curr_plane, 8,12,block_x + 2,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 8, 12); + copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 8, 12); + if (intrapred(currMB, curr_plane, 12,12,block_x + 3,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC; + Inv_Residual_trans_4x4(currMB, curr_plane, 12, 12); + copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 12, 12); + // benski> prediction might reference other parts of the image reconstructed during this block, so can't just do a single 16x16 image copy + } + + // chroma decoding ******************************************************* + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + intra_cr_decoding(currMB, yuv); + } + + return 1; +} + + +int mb_pred_intra16x16(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + int yuv = dec_picture->chroma_format_idc - 1; + + intrapred16x16(currMB, curr_plane, currMB->i16mode); + currMB->ipmode_DPCM = (char) currMB->i16mode; //For residual DPCM + // =============== 4x4 itrans ================ + // ------------------------------------------- + iMBtrans4x4(currMB, curr_plane, 0); + + // chroma decoding ******************************************************* + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + intra_cr_decoding(currMB, yuv); + } + return 1; +} + +int mb_pred_intra8x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + Slice *currSlice = currMB->p_Slice; + int yuv = dec_picture->chroma_format_idc - 1; + + if (currMB->is_lossless) + { + //PREDICTION + intrapred8x8(currMB, curr_plane, 0, 0); + Inv_Residual_trans_8x8(currMB, curr_plane, 0,0); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 0, 0); + + intrapred8x8(currMB, curr_plane, 8, 0); + Inv_Residual_trans_8x8(currMB, curr_plane, 8,0); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 8, 0); + + intrapred8x8(currMB, curr_plane, 0, 8); + Inv_Residual_trans_8x8(currMB, curr_plane, 0,8); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 0, 8); + + intrapred8x8(currMB, curr_plane, 8, 8); + Inv_Residual_trans_8x8 (currMB, curr_plane, 8,8); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 8, 8); + } + else + { + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[curr_plane]; + h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[curr_plane]; + h264_short_8x8block_t *mb_rres8 = currSlice->mb_rres8[curr_plane]; + + //PREDICTION + intrapred8x8(currMB, curr_plane, 0, 0); + opt_itrans8x8(mb_rec, mb_pred, mb_rres8[0], 0); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 0, 0); + + intrapred8x8(currMB, curr_plane, 8, 0); + opt_itrans8x8(mb_rec, mb_pred, mb_rres8[1], 8); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 8, 0); + + intrapred8x8(currMB, curr_plane, 0, 8); + opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[2], 0); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 0, 8); + + intrapred8x8(currMB, curr_plane, 8, 8); + opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[3], 8); // use DCT transform and make 8x8 block m7 from prediction block mpr + copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 8, 8); + } + + // chroma decoding ******************************************************* + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + intra_cr_decoding(currMB, yuv); + } + return 1; +} + + +static void set_chroma_vector(Macroblock *currMB, int *list_offset) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + if (!currSlice->mb_aff_frame_flag) + { + if(p_Vid->structure == TOP_FIELD) + { + int k,l; + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + if(p_Vid->structure != p_Vid->listX[l][k]->structure) + p_Vid->listX[l][k]->chroma_vector_adjustment = -2; + else + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + else if(p_Vid->structure == BOTTOM_FIELD) + { + int k,l; + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + if (p_Vid->structure != p_Vid->listX[l][k]->structure) + p_Vid->listX[l][k]->chroma_vector_adjustment = 2; + else + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + else + { + int k,l; + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + } + else + { + int mb_nr = (currMB->mbAddrX & 0x01); + int k,l; + + ////////////////////////// + // find out the correct list offsets + if (currMB->mb_field) + { + *list_offset = mb_nr ? 4 : 2; + + for (l = LIST_0 + *list_offset; l <= (LIST_1 + *list_offset); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + if(mb_nr == 0 && p_Vid->listX[l][k]->structure == BOTTOM_FIELD) + p_Vid->listX[l][k]->chroma_vector_adjustment = -2; + else if(mb_nr == 1 && p_Vid->listX[l][k]->structure == TOP_FIELD) + p_Vid->listX[l][k]->chroma_vector_adjustment = 2; + else + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + else + { + for (l = LIST_0; l <= (LIST_1); l++) + { + for(k = 0; k < p_Vid->listXsize[l]; k++) + { + p_Vid->listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + } + + p_Vid->max_mb_vmv_r = (p_Vid->structure != FRAME || (currSlice->mb_aff_frame_flag && currMB->mb_field)) ? p_Vid->max_vmv_r >> 1 : p_Vid->max_vmv_r; +} + +void mb_pred_skip(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + perform_mc16x16(currMB, curr_plane, dec_picture, LIST_0, list_offset, curr_mb_field); + + opt_copy_image_data_16x16_stride(image, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[curr_plane]); + + if (dec_picture->chroma_format_idc == YUV420) + { + copy_image_data_8x8_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1]); + copy_image_data_8x8_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2]); + } + else if (dec_picture->chroma_format_idc == YUV422) + { + copy_image_data_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1], 8, 16); + copy_image_data_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2], 8, 16); + } +} + +void mb_pred_sp_skip(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + Slice *currSlice = currMB->p_Slice; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + perform_mc16x16(currMB, curr_plane, dec_picture, LIST_0, list_offset, curr_mb_field); + iTransform(currMB, curr_plane, 1); +} + +void mb_pred_p_inter8x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + int block8x8; // needed for ABT + int i=0, j=0,k; + + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int smb = p_Vid->type == SP_SLICE && IS_INTER(currMB); + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + for (block8x8=0; block8x8<4; block8x8++) + { + int mv_mode = currMB->b8mode[block8x8]; + int pred_dir = currMB->b8pdir[block8x8]; + if (mv_mode == SMB8x8) + { + i = (decode_block_scan[block8x8*4] & 3); + j = block8x8 & ~1; + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field); + } + else if (mv_mode == SMB4x4) + { + int k_start = (block8x8 << 2); + int k_inc = (mv_mode == SMB8x4) ? 2 : 1; + int k_end = (mv_mode == SMB8x8) ? k_start + 1 : ((mv_mode == SMB4x4) ? k_start + 4 : k_start + k_inc + 1); + + int block_size_x = (mv_mode == SMB8x4) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + int block_size_y = (mv_mode == SMB4x8) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + + for (k = k_start; k < k_end; k += k_inc) + { + i = (decode_block_scan[k] & 3); + j = ((decode_block_scan[k] >> 2) & 3); + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field); + } + } + else + { + int k_start = (block8x8 << 2); + int k_inc = (mv_mode == SMB8x4) ? 2 : 1; + int k_end = k_start + k_inc + 1; + + int block_size_x = (mv_mode == SMB8x4) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + int block_size_y = (mv_mode == SMB4x8) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + + for (k = k_start; k < k_end; k += k_inc) + { + i = (decode_block_scan[k] & 3); + j = ((decode_block_scan[k] >> 2) & 3); + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field); + } + } + /* generic: + int k_start = (block8x8 << 2); + int k_inc = (mv_mode == SMB8x4) ? 2 : 1; + int k_end = (mv_mode == SMB8x8) ? k_start + 1 : ((mv_mode == SMB4x4) ? k_start + 4 : k_start + k_inc + 1); + + int block_size_x = ( mv_mode == SMB8x4 || mv_mode == SMB8x8 ) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + int block_size_y = ( mv_mode == SMB4x8 || mv_mode == SMB8x8 ) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + + for (k = k_start; k < k_end; k += k_inc) + { + i = (decode_block_scan[k] & 3); + j = ((decode_block_scan[k] >> 2) & 3); + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field); + } + */ + } + + iTransform(currMB, curr_plane, smb); +} + +void mb_pred_p_inter16x16(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + int smb = (currMB->p_Vid->type == SP_SLICE); + Slice *currSlice = currMB->p_Slice; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + perform_mc16x16(currMB, curr_plane, dec_picture, currMB->b8pdir[0], list_offset, curr_mb_field); + iTransform(currMB, curr_plane, smb); +} + +void mb_pred_p_inter16x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + int smb = (currMB->p_Vid->type == SP_SLICE); + Slice *currSlice = currMB->p_Slice; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + perform_mc16x8(currMB, curr_plane, dec_picture, currMB->b8pdir[0], 0, 0, list_offset, curr_mb_field); + perform_mc16x8(currMB, curr_plane, dec_picture, currMB->b8pdir[2], 0, 2, list_offset, curr_mb_field); + iTransform(currMB, curr_plane, smb); +} + +void mb_pred_p_inter8x16(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + int smb = (currMB->p_Vid->type == SP_SLICE); + Slice *currSlice = currMB->p_Slice; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + perform_mc8x16(currMB, curr_plane, dec_picture, currMB->b8pdir[0], 0, 0, list_offset, curr_mb_field); + perform_mc8x16(currMB, curr_plane, dec_picture, currMB->b8pdir[1], 2, 0, list_offset, curr_mb_field); + iTransform(currMB, curr_plane, smb); +} + +void mb_pred_b_dtemporal(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + short ref_idx; + int refList; + + PicMotionParams *motion = &dec_picture->motion; + int k; + int block8x8; // needed for ABT + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + + MotionParams *colocated = &currSlice->p_colocated->frame; + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + if (currMB->mb_field) + { + if(currMB->mbAddrX & 0x01) + { + colocated = &currSlice->p_colocated->bottom; + } + else + { + colocated = &currSlice->p_colocated->top; + } + } + + for (block8x8=0; block8x8<4; block8x8++) + { + int pred_dir = currMB->b8pdir[block8x8]; + + int k_start = (block8x8 << 2); + int k_end = k_start; + + if (p_Vid->active_sps->direct_8x8_inference_flag) + { + k_end ++; + } + else + { + k_end += BLOCK_MULTIPLE; + } + + for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++) + { + + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int j6 = currMB->block_y_aff + j; + assert (pred_dir<=2); + + refList = (colocated->motion[LIST_0][j6][i4].ref_idx== -1 ? LIST_1 : LIST_0); + ref_idx = colocated->motion[refList][j6][i4].ref_idx; + + if(ref_idx==-1) // co-located is intra mode + { + memset( &motion->motion[LIST_0][j4][i4].mv, 0, sizeof(MotionVector)); + memset( &motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector)); + + motion->motion[LIST_0][j4][i4].ref_idx = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + else // co-located skip or inter mode + { + int mapped_idx=0; + int iref; + + for (iref=0;iref<imin(currSlice->num_ref_idx_l0_active,p_Vid->listXsize[LIST_0 + list_offset]);iref++) + { + if(p_Vid->structure==0 && curr_mb_field==0) + { + // If the current MB is a frame MB and the colocated is from a field picture, + // then the colocated->ref_pic_id may have been generated from the wrong value of + // frame_poc if it references it's complementary field, so test both POC values + if(p_Vid->listX[0][iref]->top_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id || p_Vid->listX[0][iref]->bottom_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + continue; + } + + if (dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][iref]==colocated->motion[refList][j6][i4].ref_pic_id) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + { + mapped_idx=INVALIDINDEX; + } + } + if (INVALIDINDEX == mapped_idx) + { + error("temporal direct error: colocated block has ref that is unavailable",-1111); + } + else + { + int mv_scale = currSlice->mvscale[LIST_0 + list_offset][mapped_idx]; + + //! In such case, an array is needed for each different reference. + if (mv_scale == 9999 || p_Vid->listX[LIST_0+list_offset][mapped_idx]->is_long_term) + { + memcpy(&motion->motion[LIST_0][j4][i4].mv, &colocated->motion[refList][j6][i4].mv, sizeof(MotionVector)); + memset(&motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector)); + } + else + { + motion->motion[LIST_0][j4][i4].mv[0]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[0] + 128 ) >> 8); + motion->motion[LIST_0][j4][i4].mv[1]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[1] + 128 ) >> 8); + + motion->motion[LIST_1][j4][i4].mv[0]= (short) (motion->motion[LIST_0][j4][i4].mv[0] - colocated->motion[refList][j6][i4].mv[0]); + motion->motion[LIST_1][j4][i4].mv[1]= (short) (motion->motion[LIST_0][j4][i4].mv[1] - colocated->motion[refList][j6][i4].mv[1]); + } + + motion->motion[LIST_0][j4][i4].ref_idx = (char) mapped_idx; //p_Vid->listX[1][0]->ref_idx[refList][j4][i4]; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + } + // store reference picture ID determined by direct mode + motion->motion[LIST_0][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][(short)motion->motion[LIST_0][j4][i4].ref_idx]; + motion->motion[LIST_1][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset][(short)motion->motion[LIST_1][j4][i4].ref_idx]; + } + for (k = k_start; k < k_end; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + if (p_Vid->active_sps->direct_8x8_inference_flag) + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field); + else + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, 4, 4, curr_mb_field); + } + } + + if (currMB->cbp == 0) + { + opt_copy_image_data_16x16_stride(image, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[curr_plane]); + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444)) + { + copy_image_data_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1], p_Vid->mb_size[IS_CHROMA][0], p_Vid->mb_size[IS_CHROMA][1]); + copy_image_data_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2], p_Vid->mb_size[IS_CHROMA][0], p_Vid->mb_size[IS_CHROMA][1]); + } + } + else + iTransform(currMB, curr_plane, 0); +} + + +void mb_pred_b_inter8x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture) +{ + short ref_idx; + int refList; + + char l0_rFrame = -1, l1_rFrame = -1; + PicMotionParams *motion = &dec_picture->motion; + short pmvl0[2]={0,0}, pmvl1[2]={0,0}; + int block_size_x, block_size_y; + int k; + int block8x8; // needed for ABT + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field)); + + MotionParams *colocated = &currSlice->p_colocated->frame; + int list_offset = 0; + + set_chroma_vector(currMB, &list_offset); + + if (currMB->mb_field) + { + if(currMB->mbAddrX & 0x01) + { + colocated = &currSlice->p_colocated->bottom; + } + else + { + colocated = &currSlice->p_colocated->top; + } + } + + // prepare direct modes + if (currSlice->direct_spatial_mv_pred_flag && (!(currMB->b8mode[0] && currMB->b8mode[1] && currMB->b8mode[2] && currMB->b8mode[3]))) + prepare_direct_params(currMB, dec_picture, pmvl0, pmvl1, &l0_rFrame, &l1_rFrame); + + for (block8x8=0; block8x8<4; block8x8++) + { + int mv_mode = currMB->b8mode[block8x8]; + int pred_dir = currMB->b8pdir[block8x8]; + + if ( mv_mode == SMB8x8) + { + int i = (decode_block_scan[block8x8*4] & 3); + int j = ((decode_block_scan[block8x8*4] >> 2) & 3); + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field); + } + else if ( mv_mode == SMB4x4) + { + int k_start = (block8x8 << 2); + + for (k = k_start; k < k_start + 4; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, BLOCK_SIZE, BLOCK_SIZE, curr_mb_field); + } + } + else if ( mv_mode != BSKIP_DIRECT) + { + int k_start = (block8x8 << 2); + int k_inc = (mv_mode == SMB8x4) ? 2 : 1; + int k_end = (k_start + k_inc + 1); + + block_size_x = ( mv_mode == SMB8x4) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + block_size_y = ( mv_mode == SMB4x8) ? SMB_BLOCK_SIZE : BLOCK_SIZE; + + for (k = k_start; k < k_end; k += k_inc) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field); + } + } + else + { + int k_start = (block8x8 << 2); + + // Prepare mvs (needed for deblocking and mv prediction + if (currSlice->direct_spatial_mv_pred_flag) + { + h264_ref_t *ref_pic_num_l0 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset]; + h264_ref_t *ref_pic_num_l1 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset]; + + for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int j6 = currMB->block_y_aff + j; + + assert (pred_dir<=2); + //===== DIRECT PREDICTION ===== + + if (l0_rFrame >=0) + { + if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + motion->motion[LIST_0][j4][i4].ref_idx = 0; + } + else + { + motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0]; + motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1]; + motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame; + } + } + else + { + motion->motion[LIST_0][j4][i4].ref_idx = -1; + motion->motion[LIST_0][j4][i4].mv[0] = 0; + motion->motion[LIST_0][j4][i4].mv[1] = 0; + } + + if (l1_rFrame >=0) + { + if (l1_rFrame==0 && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term))) + { + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame; + } + else + { + motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0]; + motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1]; + motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame; + } + } + else + { + motion->motion[LIST_1][j4][i4].mv[0] = 0; + motion->motion[LIST_1][j4][i4].mv[1] = 0; + motion->motion[LIST_1][j4][i4].ref_idx = -1; + } + + if (l0_rFrame < 0 && l1_rFrame < 0) + { + motion->motion[LIST_0][j4][i4].ref_idx = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + + if (motion->motion[LIST_1][j4][i4].ref_idx==-1) + { + pred_dir = 0; + ref_idx = (motion->motion[LIST_0][j4][i4].ref_idx != -1) ? motion->motion[LIST_0][j4][i4].ref_idx : 0; + } + else if (motion->motion[LIST_0][j4][i4].ref_idx==-1) + { + pred_dir = 1; + ref_idx = (motion->motion[LIST_1][j4][i4].ref_idx != -1) ? motion->motion[LIST_1][j4][i4].ref_idx : 0; + } + else + pred_dir = 2; + + motion->motion[LIST_0][j4][i4].ref_pic_id = ref_pic_num_l0[(short)motion->motion[LIST_0][j4][i4].ref_idx]; + motion->motion[LIST_1][j4][i4].ref_pic_id = ref_pic_num_l1[(short)motion->motion[LIST_1][j4][i4].ref_idx]; + } + } + else + { + for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int j6 = currMB->block_y_aff + j; + + assert (pred_dir<=2); + + refList = (colocated->motion[LIST_0][j6][i4].ref_idx== -1 ? LIST_1 : LIST_0); + ref_idx = colocated->motion[refList][j6][i4].ref_idx; + + if(ref_idx==-1) // co-located is intra mode + { + memset( &motion->motion[LIST_0][j4][i4].mv, 0, sizeof(MotionVector)); + memset( &motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector)); + + motion->motion[LIST_0][j4][i4].ref_idx = 0; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + else // co-located skip or inter mode + { + int mapped_idx=0; + int iref; + + for (iref=0;iref<imin(currSlice->num_ref_idx_l0_active,p_Vid->listXsize[LIST_0 + list_offset]);iref++) + { + if(p_Vid->structure==0 && curr_mb_field==0) + { + // If the current MB is a frame MB and the colocated is from a field picture, + // then the colocated->ref_pic_id may have been generated from the wrong value of + // frame_poc if it references it's complementary field, so test both POC values + if(p_Vid->listX[0][iref]->top_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id || p_Vid->listX[0][iref]->bottom_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + continue; + } + + if (dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][iref]==colocated->motion[refList][j6][i4].ref_pic_id) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + { + mapped_idx=INVALIDINDEX; + } + } + if (INVALIDINDEX == mapped_idx) + { + error("temporal direct error: colocated block has ref that is unavailable",-1111); + } + else + { + int mv_scale = currSlice->mvscale[LIST_0 + list_offset][mapped_idx]; + + //! In such case, an array is needed for each different reference. + if (mv_scale == 9999 || p_Vid->listX[LIST_0+list_offset][mapped_idx]->is_long_term) + { + memcpy(&motion->motion[LIST_0][j4][i4].mv, &colocated->motion[refList][j6][i4].mv, sizeof(MotionVector)); + memset(&motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector)); + } + else + { + motion->motion[LIST_0][j4][i4].mv[0]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[0] + 128 ) >> 8); + motion->motion[LIST_0][j4][i4].mv[1]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[1] + 128 ) >> 8); + + motion->motion[LIST_1][j4][i4].mv[0]= (short) (motion->motion[LIST_0][j4][i4].mv[0] - colocated->motion[refList][j6][i4].mv[0]); + motion->motion[LIST_1][j4][i4].mv[1]= (short) (motion->motion[LIST_0][j4][i4].mv[1] - colocated->motion[refList][j6][i4].mv[1]); + } + + motion->motion[LIST_0][j4][i4].ref_idx = (char) mapped_idx; //p_Vid->listX[1][0]->ref_idx[refList][j4][i4]; + motion->motion[LIST_1][j4][i4].ref_idx = 0; + } + } + // store reference picture ID determined by direct mode + motion->motion[LIST_0][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][(short)motion->motion[LIST_0][j4][i4].ref_idx]; + motion->motion[LIST_1][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset][(short)motion->motion[LIST_1][j4][i4].ref_idx]; + } + } + + if (p_Vid->active_sps->direct_8x8_inference_flag) + { + int i = (decode_block_scan[k_start] & 3); + int j = ((decode_block_scan[k_start] >> 2) & 3); + perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field); + } + else + { + for (k = k_start; k < k_start+BLOCK_MULTIPLE; k ++) + { + int i = (decode_block_scan[k] & 3); + int j = ((decode_block_scan[k] >> 2) & 3); + perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, BLOCK_SIZE, BLOCK_SIZE, curr_mb_field); + } + } + } + } + + iTransform(currMB, curr_plane, 0); +} + +/*! +************************************************************************ +* \brief +* Copy IPCM coefficients to decoded picture buffer and set parameters for this MB +* (for IPCM CABAC and IPCM CAVLC 28/11/2003) +* +* \author +* Dong Wang <Dong.Wang@bristol.ac.uk> +************************************************************************ +*/ +void set_chroma_qp(Macroblock* currMB); +static inline void update_qp(Macroblock *currMB, int qp) +{ + VideoParameters *p_Vid = currMB->p_Vid; + currMB->qp = qp; + currMB->qp_scaled[0] = qp + p_Vid->bitdepth_luma_qp_scale; + set_chroma_qp(currMB); + currMB->is_lossless = (Boolean) ((currMB->qp_scaled[0] == 0) && (p_Vid->lossless_qpprime_flag == 1)); +} + +void mb_pred_ipcm(Macroblock *currMB) +{ + int i, j, k; + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + + //Copy coefficients to decoded picture buffer + //IPCM coefficients are stored in currSlice->ipcm which is set in function read_IPCM_coeffs_from_NAL() + + for(i = 0; i < MB_BLOCK_SIZE; ++i) + { + for(j = 0;j < MB_BLOCK_SIZE ; ++j) + { + dec_picture->imgY->img[currMB->pix_y + i][currMB->pix_x + j] = (imgpel) currSlice->ipcm[0][i][j]; + } + } + + if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid)) + { + for (k = 0; k < 2; ++k) + { + for(i = 0; i < p_Vid->mb_cr_size_y; ++i) + { + for(j = 0;j < p_Vid->mb_cr_size_x; ++j) + { + dec_picture->imgUV[k]->img[currMB->pix_c_y+i][currMB->pix_c_x + j] = (imgpel) currSlice->ipcm[k + 1][i][j]; + } + } + } + } + + // for deblocking filter + update_qp(currMB, 0); + + // for CAVLC: Set the nz_coeff to 16. + // These parameters are to be used in CAVLC decoding of neighbour blocks + memset(&p_Vid->nz_coeff[currMB->mbAddrX][0][0][0], 16, sizeof(h264_nz_coefficient)); + + // for CABAC decoding of MB skip flag + currMB->skip_flag = 0; + + //for deblocking filter CABAC + currMB->cbp_blk[0] = 0xFFFF; + + //For CABAC decoding of Dquant + currSlice->last_dquant = 0; +} + diff --git a/Src/h264dec/ldecod/src/mbuffer.c b/Src/h264dec/ldecod/src/mbuffer.c new file mode 100644 index 00000000..9784d1ef --- /dev/null +++ b/Src/h264dec/ldecod/src/mbuffer.c @@ -0,0 +1,4409 @@ + +/*! + *********************************************************************** + * \file + * mbuffer.c + * + * \brief + * Frame buffer functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Sühring <suehring@hhi.de> + * - Alexis Tourapis <alexismt@ieee.org> + * - Jill Boyce <jill.boyce@thomson.net> + * - Saurav K Bandyopadhyay <saurav@ieee.org> + * - Zhenyu Wu <Zhenyu.Wu@thomson.net + * - Purvin Pandit <Purvin.Pandit@thomson.net> + * + *********************************************************************** + */ + +#include <limits.h> + +#include "global.h" +#include "erc_api.h" +#include "header.h" +#include "image.h" +#include "mbuffer.h" +#include "memalloc.h" +#include "output.h" + + + +static void insert_picture_in_dpb (VideoParameters *p_Vid, FrameStore* fs, StorablePicture* p); +static void output_one_frame_from_dpb(VideoParameters *p_Vid); +static void get_smallest_poc (DecodedPictureBuffer *p_Dpb, int *poc,int * pos); +static void gen_field_ref_ids (StorablePicture *p); +static int remove_unused_frame_from_dpb (VideoParameters *p_Vid, DecodedPictureBuffer *p_Dpb); +static int is_used_for_reference (FrameStore* fs); +static int is_short_term_reference (FrameStore* fs); +static int is_long_term_reference (FrameStore* fs); + +#define MAX_LIST_SIZE 33 + +/*! + ************************************************************************ + * \brief + * Print out list of pictures in DPB. Used for debug purposes. + ************************************************************************ + */ +static void dump_dpb(DecodedPictureBuffer *p_Dpb) +{ +#if DUMP_DPB + unsigned i; + + for (i=0; i<p_Dpb->used_size;i++) + { + printf("("); + printf("fn=%d ", p_Dpb->fs[i]->frame_num); + if (p_Dpb->fs[i]->is_used & 1) + { + if (p_Dpb->fs[i]->top_field) + printf("T: poc=%d ", p_Dpb->fs[i]->top_field->poc); + else + printf("T: poc=%d ", p_Dpb->fs[i]->frame->top_poc); + } + if (p_Dpb->fs[i]->is_used & 2) + { + if (p_Dpb->fs[i]->bottom_field) + printf("B: poc=%d ", p_Dpb->fs[i]->bottom_field->poc); + else + printf("B: poc=%d ", p_Dpb->fs[i]->frame->bottom_poc); + } + if (p_Dpb->fs[i]->is_used == 3) + printf("F: poc=%d ", p_Dpb->fs[i]->frame->poc); + printf("G: poc=%d) ", p_Dpb->fs[i]->poc); + if (p_Dpb->fs[i]->is_reference) printf ("ref (%d) ", p_Dpb->fs[i]->is_reference); + if (p_Dpb->fs[i]->is_long_term) printf ("lt_ref (%d) ", p_Dpb->fs[i]->is_reference); + if (p_Dpb->fs[i]->is_output) printf ("out "); + if (p_Dpb->fs[i]->is_used == 3) + { + if (p_Dpb->fs[i]->frame->non_existing) printf ("ne "); + } + printf ("\n"); + } +#endif +} + +/*! + ************************************************************************ + * \brief + * Returns the size of the dpb depending on level and picture size + * + * + ************************************************************************ + */ +static int getDpbSize(seq_parameter_set_rbsp_t *active_sps) +{ + int pic_size = (active_sps->pic_width_in_mbs_minus1 + 1) * (active_sps->pic_height_in_map_units_minus1 + 1) * (active_sps->frame_mbs_only_flag?1:2) * 384; + + int size = 0; + + switch (active_sps->level_idc) + { + case 9: + size = 152064; + break; + case 10: + size = 152064; + break; + case 11: + if (!IS_FREXT_PROFILE(active_sps->profile_idc) && (active_sps->constrained_set3_flag == 1)) + size = 152064; + else + size = 345600; + break; + case 12: + size = 912384; + break; + case 13: + size = 912384; + break; + case 20: + size = 912384; + break; + case 21: + size = 1824768; + break; + case 22: + size = 3110400; + break; + case 30: + size = 3110400; + break; + case 31: + size = 6912000; + break; + case 32: + size = 7864320; + break; + case 40: + size = 12582912; + break; + case 41: + size = 12582912; + break; + case 42: + size = 13369344; + break; + case 50: + size = 42393600; + break; + case 51: + size = 70778880; + break; + default: + error ("undefined level", 500); + break; + } + + size /= pic_size; + size = imin( size, 16); + + if (active_sps->vui_parameters_present_flag && active_sps->vui_seq_parameters.bitstream_restriction_flag) + { + if ((int)active_sps->vui_seq_parameters.max_dec_frame_buffering > size) + { + error ("max_dec_frame_buffering larger than MaxDpbSize", 500); + } + size = imax (1, active_sps->vui_seq_parameters.max_dec_frame_buffering); + } + + return size; +} + +/*! + ************************************************************************ + * \brief + * Check then number of frames marked "used for reference" and break + * if maximum is exceeded + * + ************************************************************************ + */ +void check_num_ref(DecodedPictureBuffer *p_Dpb) +{ + if ((int)(p_Dpb->ltref_frames_in_buffer + p_Dpb->ref_frames_in_buffer ) > (imax(1, p_Dpb->num_ref_frames))) + { + error ("Max. number of reference frames exceeded. Invalid stream.", 500); + } +} + + +/*! + ************************************************************************ + * \brief + * Allocate memory for decoded picture buffer and initialize with sane values. + * + ************************************************************************ + */ +void init_dpb(VideoParameters *p_Vid) +{ + unsigned i,j; + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + size_t num_output_pictures; + + if (p_Dpb->init_done) + { + free_dpb(p_Vid); + } + + p_Dpb->p_Vid = p_Vid; + p_Dpb->size = getDpbSize(active_sps); + + p_Dpb->num_ref_frames = active_sps->num_ref_frames; + + if (p_Dpb->size < active_sps->num_ref_frames) + { + error ("DPB size at specified level is smaller than the specified number of reference frames. This is not allowed.\n", 1000); + } + + p_Dpb->used_size = 0; + p_Dpb->last_picture = NULL; + + p_Dpb->ref_frames_in_buffer = 0; + p_Dpb->ltref_frames_in_buffer = 0; + + p_Dpb->fs = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==p_Dpb->fs) + no_mem_exit("init_dpb: dpb->fs"); + + p_Dpb->fs_ref = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==p_Dpb->fs_ref) + no_mem_exit("init_dpb: dpb->fs_ref"); + + p_Dpb->fs_ltref = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==p_Dpb->fs_ltref) + no_mem_exit("init_dpb: dpb->fs_ltref"); + + for (i=0; i<p_Dpb->size; i++) + { + p_Dpb->fs[i] = alloc_frame_store(); + p_Dpb->fs_ref[i] = NULL; + p_Dpb->fs_ltref[i] = NULL; + } + + for (i=0; i<6; i++) + { + p_Vid->listX[i] = calloc(MAX_LIST_SIZE, sizeof (StorablePicture*)); // +1 for reordering + if (NULL==p_Vid->listX[i]) + no_mem_exit("init_dpb: p_Vid->listX[i]"); + } + + /* allocate a dummy storable picture */ + p_Vid->no_reference_picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + p_Vid->no_reference_picture->top_field = p_Vid->no_reference_picture; + p_Vid->no_reference_picture->bottom_field = p_Vid->no_reference_picture; + p_Vid->no_reference_picture->frame = p_Vid->no_reference_picture; + + + for (j=0;j<6;j++) + { + for (i=0; i<MAX_LIST_SIZE; i++) + { + p_Vid->listX[j][i] = NULL; + } + p_Vid->listXsize[j]=0; + } + + p_Dpb->last_output_poc = INT_MIN; + + p_Vid->last_has_mmco_5 = 0; + + p_Dpb->init_done = 1; + + num_output_pictures = getDpbSize(active_sps) + active_sps->vui_seq_parameters.max_dec_frame_buffering; + out_storable_pictures_init(p_Vid, num_output_pictures); +} +/*! + ************************************************************************ + * \brief + * Free memory for decoded picture buffer. + ************************************************************************ + */ +void free_dpb(VideoParameters *p_Vid) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + unsigned i; + if (p_Dpb->fs) + { + for (i=0; i<p_Dpb->size; i++) + { + free_frame_store(p_Vid, p_Dpb->fs[i]); + } + free (p_Dpb->fs); + p_Dpb->fs=NULL; + } + if (p_Dpb->fs_ref) + { + free (p_Dpb->fs_ref); + } + if (p_Dpb->fs_ltref) + { + free (p_Dpb->fs_ltref); + } + p_Dpb->last_output_poc = INT_MIN; + + for (i=0; i<6; i++) + if (p_Vid->listX[i]) + { + free (p_Vid->listX[i]); + p_Vid->listX[i] = NULL; + } + + p_Dpb->init_done = 0; + + free_storable_picture(p_Vid, p_Vid->no_reference_picture); +} + + +/*! + ************************************************************************ + * \brief + * Allocate memory for decoded picture buffer frame stores an initialize with sane values. + * + * \return + * the allocated FrameStore structure + ************************************************************************ + */ +FrameStore* alloc_frame_store(void) +{ + FrameStore *f; + + f = calloc (1, sizeof(FrameStore)); + if (NULL==f) + no_mem_exit("alloc_frame_store: f"); + + f->is_used = 0; + f->is_reference = 0; + f->is_long_term = 0; + f->is_orig_reference = 0; + + f->is_output = 0; + + f->frame = NULL;; + f->top_field = NULL; + f->bottom_field = NULL; + + return f; +} + + +/*! + ************************************************************************ + * \brief + * Free frame store memory. + * + * \param p_Vid + * image decoding parameters for current picture + * \param f + * FrameStore to be freed + * + ************************************************************************ + */ +void free_frame_store(VideoParameters *p_Vid, FrameStore* f) +{ + if (f) + { + if (f->frame) + { + free_storable_picture(p_Vid, f->frame); + f->frame=NULL; + } + if (f->top_field) + { + free_storable_picture(p_Vid, f->top_field); + f->top_field=NULL; + } + if (f->bottom_field) + { + free_storable_picture(p_Vid, f->bottom_field); + f->bottom_field=NULL; + } + free(f); + } +} + +void free_pic_motion(VideoParameters *p_Vid, PicMotionParams *motion, int size_x, int size_y); + + +/*! + ************************************************************************ + * \brief + * mark FrameStore unused for reference + * + ************************************************************************ + */ +static void unmark_for_reference(VideoParameters *p_Vid, FrameStore* fs) +{ + + if (fs->is_used & 1) + { + if (fs->top_field) + { + fs->top_field->used_for_reference = 0; + } + } + if (fs->is_used & 2) + { + if (fs->bottom_field) + { + fs->bottom_field->used_for_reference = 0; + } + } + if (fs->is_used == 3) + { + if (fs->top_field && fs->bottom_field) + { + fs->top_field->used_for_reference = 0; + fs->bottom_field->used_for_reference = 0; + } + fs->frame->used_for_reference = 0; + } + + fs->is_reference = 0; + + if(fs->frame) + { + free_pic_motion(p_Vid, &fs->frame->motion, fs->frame->size_x, fs->frame->size_y); + } + + if (fs->top_field) + { + free_pic_motion(p_Vid, &fs->top_field->motion, fs->top_field->size_x, fs->top_field->size_y); + } + + if (fs->bottom_field) + { + free_pic_motion(p_Vid, &fs->bottom_field->motion, fs->bottom_field->size_x, fs->bottom_field->size_y); + } +} + + +/*! + ************************************************************************ + * \brief + * mark FrameStore unused for reference and reset long term flags + * + ************************************************************************ + */ +static void unmark_for_long_term_reference(FrameStore* fs) +{ + + if (fs->is_used & 1) + { + if (fs->top_field) + { + fs->top_field->used_for_reference = 0; + fs->top_field->is_long_term = 0; + } + } + if (fs->is_used & 2) + { + if (fs->bottom_field) + { + fs->bottom_field->used_for_reference = 0; + fs->bottom_field->is_long_term = 0; + } + } + if (fs->is_used == 3) + { + if (fs->top_field && fs->bottom_field) + { + fs->top_field->used_for_reference = 0; + fs->top_field->is_long_term = 0; + fs->bottom_field->used_for_reference = 0; + fs->bottom_field->is_long_term = 0; + } + fs->frame->used_for_reference = 0; + fs->frame->is_long_term = 0; + } + + fs->is_reference = 0; + fs->is_long_term = 0; +} + + +/*! + ************************************************************************ + * \brief + * compares two stored pictures by picture number for qsort in descending order + * + ************************************************************************ + */ +static inline int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 ) +{ + int pic_num1 = (*(StorablePicture**)arg1)->pic_num; + int pic_num2 = (*(StorablePicture**)arg2)->pic_num; + + if (pic_num1 < pic_num2) + return 1; + if (pic_num1 > pic_num2) + return -1; + else + return 0; +} + +/*! + ************************************************************************ + * \brief + * compares two stored pictures by picture number for qsort in descending order + * + ************************************************************************ + */ +static inline int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 ) +{ + int long_term_pic_num1 = (*(StorablePicture**)arg1)->long_term_pic_num; + int long_term_pic_num2 = (*(StorablePicture**)arg2)->long_term_pic_num; + + if ( long_term_pic_num1 < long_term_pic_num2) + return -1; + if ( long_term_pic_num1 > long_term_pic_num2) + return 1; + else + return 0; +} + +/*! + ************************************************************************ + * \brief + * compares two frame stores by pic_num for qsort in descending order + * + ************************************************************************ + */ +static inline int compare_fs_by_frame_num_desc( const void *arg1, const void *arg2 ) +{ + int frame_num_wrap1 = (*(FrameStore**)arg1)->frame_num_wrap; + int frame_num_wrap2 = (*(FrameStore**)arg2)->frame_num_wrap; + if ( frame_num_wrap1 < frame_num_wrap2) + return 1; + if ( frame_num_wrap1 > frame_num_wrap2) + return -1; + else + return 0; +} + + +/*! + ************************************************************************ + * \brief + * compares two frame stores by lt_pic_num for qsort in descending order + * + ************************************************************************ + */ +static inline int compare_fs_by_lt_pic_idx_asc( const void *arg1, const void *arg2 ) +{ + int long_term_frame_idx1 = (*(FrameStore**)arg1)->long_term_frame_idx; + int long_term_frame_idx2 = (*(FrameStore**)arg2)->long_term_frame_idx; + + if ( long_term_frame_idx1 < long_term_frame_idx2) + return -1; + if ( long_term_frame_idx1 > long_term_frame_idx2) + return 1; + else + return 0; +} + + +/*! + ************************************************************************ + * \brief + * compares two stored pictures by poc for qsort in ascending order + * + ************************************************************************ + */ +static inline int compare_pic_by_poc_asc( const void *arg1, const void *arg2 ) +{ + int poc1 = (*(StorablePicture**)arg1)->poc; + int poc2 = (*(StorablePicture**)arg2)->poc; + + if ( poc1 < poc2) + return -1; + if ( poc1 > poc2) + return 1; + else + return 0; +} + + +/*! + ************************************************************************ + * \brief + * compares two stored pictures by poc for qsort in descending order + * + ************************************************************************ + */ +static inline int compare_pic_by_poc_desc( const void *arg1, const void *arg2 ) +{ + int poc1 = (*(StorablePicture**)arg1)->poc; + int poc2 = (*(StorablePicture**)arg2)->poc; + + if (poc1 < poc2) + return 1; + if (poc1 > poc2) + return -1; + else + return 0; +} + + +/*! + ************************************************************************ + * \brief + * compares two frame stores by poc for qsort in ascending order + * + ************************************************************************ + */ +static inline int compare_fs_by_poc_asc( const void *arg1, const void *arg2 ) +{ + int poc1 = (*(FrameStore**)arg1)->poc; + int poc2 = (*(FrameStore**)arg2)->poc; + + if (poc1 < poc2) + return -1; + if (poc1 > poc2) + return 1; + else + return 0; +} + + +/*! + ************************************************************************ + * \brief + * compares two frame stores by poc for qsort in descending order + * + ************************************************************************ + */ +static inline int compare_fs_by_poc_desc( const void *arg1, const void *arg2 ) +{ + int poc1 = (*(FrameStore**)arg1)->poc; + int poc2 = (*(FrameStore**)arg2)->poc; + + if (poc1 < poc2) + return 1; + if (poc1 > poc2) + return -1; + else + return 0; +} + + +/*! + ************************************************************************ + * \brief + * returns true, if picture is short term reference picture + * + ************************************************************************ + */ +int is_short_ref(StorablePicture *s) +{ + return ((s->used_for_reference) && (!(s->is_long_term))); +} + + +/*! + ************************************************************************ + * \brief + * returns true, if picture is long term reference picture + * + ************************************************************************ + */ +int is_long_ref(StorablePicture *s) +{ + return ((s->used_for_reference) && (s->is_long_term)); +} + + +/*! + ************************************************************************ + * \brief + * Generates a alternating field list from a given FrameStore list + * + ************************************************************************ + */ +static void gen_pic_list_from_frame_list(PictureStructure currStructure, FrameStore **fs_list, int list_idx, StorablePicture **list, char *list_size, int long_term) +{ + int top_idx = 0; + int bot_idx = 0; + + int (*is_ref)(StorablePicture *s); + + if (long_term) + is_ref=is_long_ref; + else + is_ref=is_short_ref; + + if (currStructure == TOP_FIELD) + { + while ((top_idx<list_idx)||(bot_idx<list_idx)) + { + for ( ; top_idx<list_idx; top_idx++) + { + if(fs_list[top_idx]->is_used & 1) + { + if(is_ref(fs_list[top_idx]->top_field)) + { + // short term ref pic + list[(short) *list_size] = fs_list[top_idx]->top_field; + (*list_size)++; + top_idx++; + break; + } + } + } + for ( ; bot_idx<list_idx; bot_idx++) + { + if(fs_list[bot_idx]->is_used & 2) + { + if(is_ref(fs_list[bot_idx]->bottom_field)) + { + // short term ref pic + list[(short) *list_size] = fs_list[bot_idx]->bottom_field; + (*list_size)++; + bot_idx++; + break; + } + } + } + } + } + if (currStructure == BOTTOM_FIELD) + { + while ((top_idx<list_idx)||(bot_idx<list_idx)) + { + for ( ; bot_idx<list_idx; bot_idx++) + { + if(fs_list[bot_idx]->is_used & 2) + { + if(is_ref(fs_list[bot_idx]->bottom_field)) + { + // short term ref pic + list[(short) *list_size] = fs_list[bot_idx]->bottom_field; + (*list_size)++; + bot_idx++; + break; + } + } + } + for ( ; top_idx<list_idx; top_idx++) + { + if(fs_list[top_idx]->is_used & 1) + { + if(is_ref(fs_list[top_idx]->top_field)) + { + // short term ref pic + list[(short) *list_size] = fs_list[top_idx]->top_field; + (*list_size)++; + top_idx++; + break; + } + } + } + } + } +} + + +/*! + ************************************************************************ + * \brief + * Initialize p_Vid->listX[0] and list 1 depending on current slice type + * + ************************************************************************ + */ +void init_lists(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + int add_top = 0, add_bottom = 0; + unsigned i; + int j; + int MaxFrameNum = 1 << (active_sps->log2_max_frame_num_minus4 + 4); + int diff; + + int list0idx = 0; + int list0idx_1 = 0; + int listltidx = 0; + + FrameStore **fs_list0; + FrameStore **fs_list1; + FrameStore **fs_listlt; + + StorablePicture *tmp_s; + + if (currSlice->structure == FRAME) + { + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_used==3) + { + if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term)) + { + if( p_Dpb->fs_ref[i]->frame_num > p_Vid->frame_num ) + { + p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num - MaxFrameNum; + } + else + { + p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num; + } + p_Dpb->fs_ref[i]->frame->pic_num = p_Dpb->fs_ref[i]->frame_num_wrap; + } + } + } + // update long_term_pic_num + for (i = 0; i < p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->is_used==3) + { + if (p_Dpb->fs_ltref[i]->frame->is_long_term) + { + p_Dpb->fs_ltref[i]->frame->long_term_pic_num = p_Dpb->fs_ltref[i]->frame->long_term_frame_idx; + } + } + } + } + else + { + if (currSlice->structure == TOP_FIELD) + { + add_top = 1; + add_bottom = 0; + } + else + { + add_top = 0; + add_bottom = 1; + } + + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_reference) + { + if( p_Dpb->fs_ref[i]->frame_num > p_Vid->frame_num ) + { + p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num - MaxFrameNum; + } + else + { + p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num; + } + if (p_Dpb->fs_ref[i]->is_reference & 1) + { + p_Dpb->fs_ref[i]->top_field->pic_num = (2 * p_Dpb->fs_ref[i]->frame_num_wrap) + add_top; + } + if (p_Dpb->fs_ref[i]->is_reference & 2) + { + p_Dpb->fs_ref[i]->bottom_field->pic_num = (2 * p_Dpb->fs_ref[i]->frame_num_wrap) + add_bottom; + } + } + } + // update long_term_pic_num + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->is_long_term & 1) + { + p_Dpb->fs_ltref[i]->top_field->long_term_pic_num = 2 * p_Dpb->fs_ltref[i]->top_field->long_term_frame_idx + add_top; + } + if (p_Dpb->fs_ltref[i]->is_long_term & 2) + { + p_Dpb->fs_ltref[i]->bottom_field->long_term_pic_num = 2 * p_Dpb->fs_ltref[i]->bottom_field->long_term_frame_idx + add_bottom; + } + } + } + + if ((currSlice->slice_type == I_SLICE)||(currSlice->slice_type == SI_SLICE)) + { + p_Vid->listXsize[0] = 0; + p_Vid->listXsize[1] = 0; + return; + } + + if ((currSlice->slice_type == P_SLICE)||(currSlice->slice_type == SP_SLICE)) + { + // Calculate FrameNumWrap and PicNum + if (currSlice->structure == FRAME) + { + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_used==3) + { + if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term)) + { + p_Vid->listX[0][list0idx++] = p_Dpb->fs_ref[i]->frame; + } + } + } + // order list 0 by PicNum + qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc); + p_Vid->listXsize[0] = (char) list0idx; +// printf("listX[0] (PicNum): "); for (i=0; i<list0idx; i++){printf ("%d ", p_Vid->listX[0][i]->pic_num);} printf("\n"); + + // long term handling + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->is_used==3) + { + if (p_Dpb->fs_ltref[i]->frame->is_long_term) + { + p_Vid->listX[0][list0idx++]=p_Dpb->fs_ltref[i]->frame; + } + } + } + qsort((void *)&p_Vid->listX[0][(short) p_Vid->listXsize[0]], list0idx - p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + p_Vid->listXsize[0] = (char) list0idx; + } + else + { + fs_list0 = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==fs_list0) + no_mem_exit("init_lists: fs_list0"); + fs_listlt = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==fs_listlt) + no_mem_exit("init_lists: fs_listlt"); + + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_reference) + { + fs_list0[list0idx++] = p_Dpb->fs_ref[i]; + } + } + + qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_frame_num_desc); + +// printf("fs_list0 (FrameNum): "); for (i=0; i<list0idx; i++){printf ("%d ", fs_list0[i]->frame_num_wrap);} printf("\n"); + + p_Vid->listXsize[0] = 0; + gen_pic_list_from_frame_list(currSlice->structure, fs_list0, list0idx, p_Vid->listX[0], &p_Vid->listXsize[0], 0); + +// printf("p_Vid->listX[0] (PicNum): "); for (i=0; i<p_Vid->listXsize[0]; i++){printf ("%d ", p_Vid->listX[0][i]->pic_num);} printf("\n"); + + // long term handling + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + fs_listlt[listltidx++]=p_Dpb->fs_ltref[i]; + } + + qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc); + + gen_pic_list_from_frame_list(currSlice->structure, fs_listlt, listltidx, p_Vid->listX[0], &p_Vid->listXsize[0], 1); + + free(fs_list0); + free(fs_listlt); + } + p_Vid->listXsize[1] = 0; + } + else + { + // B-Slice + if (currSlice->structure == FRAME) + { + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_used==3) + { + if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term)) + { + if (p_Vid->framepoc >= p_Dpb->fs_ref[i]->frame->poc) //!KS use >= for error concealment +// if (p_Vid->framepoc > p_Dpb->fs_ref[i]->frame->poc) + { + p_Vid->listX[0][list0idx++] = p_Dpb->fs_ref[i]->frame; + } + } + } + } + qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc); + list0idx_1 = list0idx; + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_used==3) + { + if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term)) + { + if (p_Vid->framepoc < p_Dpb->fs_ref[i]->frame->poc) + { + p_Vid->listX[0][list0idx++] = p_Dpb->fs_ref[i]->frame; + } + } + } + } + qsort((void *)&p_Vid->listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc); + + for (j=0; j<list0idx_1; j++) + { + p_Vid->listX[1][list0idx-list0idx_1+j]=p_Vid->listX[0][j]; + } + for (j=list0idx_1; j<list0idx; j++) + { + p_Vid->listX[1][j-list0idx_1]=p_Vid->listX[0][j]; + } + + p_Vid->listXsize[0] = p_Vid->listXsize[1] = (char) list0idx; + +// printf("p_Vid->listX[0] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[0]; i++){printf ("%d ", p_Vid->listX[0][i]->poc);} printf("\n"); +// printf("p_Vid->listX[1] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[1]; i++){printf ("%d ", p_Vid->listX[1][i]->poc);} printf("\n"); + + // long term handling + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->is_used==3) + { + if (p_Dpb->fs_ltref[i]->frame->is_long_term) + { + p_Vid->listX[0][list0idx] =p_Dpb->fs_ltref[i]->frame; + p_Vid->listX[1][list0idx++]=p_Dpb->fs_ltref[i]->frame; + } + } + } + qsort((void *)&p_Vid->listX[0][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + qsort((void *)&p_Vid->listX[1][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + p_Vid->listXsize[0] = p_Vid->listXsize[1] = (char) list0idx; + } + else + { + fs_list0 = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==fs_list0) + no_mem_exit("init_lists: fs_list0"); + fs_list1 = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==fs_list1) + no_mem_exit("init_lists: fs_list1"); + fs_listlt = calloc(p_Dpb->size, sizeof (FrameStore*)); + if (NULL==fs_listlt) + no_mem_exit("init_lists: fs_listlt"); + + p_Vid->listXsize[0] = 0; + p_Vid->listXsize[1] = 1; + + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_used) + { + if (p_Vid->ThisPOC >= p_Dpb->fs_ref[i]->poc) + { + fs_list0[list0idx++] = p_Dpb->fs_ref[i]; + } + } + } + qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_poc_desc); + list0idx_1 = list0idx; + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_used) + { + if (p_Vid->ThisPOC < p_Dpb->fs_ref[i]->poc) + { + fs_list0[list0idx++] = p_Dpb->fs_ref[i]; + } + } + } + qsort((void *)&fs_list0[list0idx_1], list0idx-list0idx_1, sizeof(FrameStore*), compare_fs_by_poc_asc); + + for (j=0; j<list0idx_1; j++) + { + fs_list1[list0idx-list0idx_1+j]=fs_list0[j]; + } + for (j=list0idx_1; j<list0idx; j++) + { + fs_list1[j-list0idx_1]=fs_list0[j]; + } + +// printf("fs_list0 currPoc=%d (Poc): ", p_Vid->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d ", fs_list0[i]->poc);} printf("\n"); +// printf("fs_list1 currPoc=%d (Poc): ", p_Vid->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d ", fs_list1[i]->poc);} printf("\n"); + + p_Vid->listXsize[0] = 0; + p_Vid->listXsize[1] = 0; + gen_pic_list_from_frame_list(currSlice->structure, fs_list0, list0idx, p_Vid->listX[0], &p_Vid->listXsize[0], 0); + gen_pic_list_from_frame_list(currSlice->structure, fs_list1, list0idx, p_Vid->listX[1], &p_Vid->listXsize[1], 0); + +// printf("p_Vid->listX[0] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[0]; i++){printf ("%d ", p_Vid->listX[0][i]->poc);} printf("\n"); +// printf("p_Vid->listX[1] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[1]; i++){printf ("%d ", p_Vid->listX[1][i]->poc);} printf("\n"); + + // long term handling + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + fs_listlt[listltidx++]=p_Dpb->fs_ltref[i]; + } + + qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc); + + gen_pic_list_from_frame_list(currSlice->structure, fs_listlt, listltidx, p_Vid->listX[0], &p_Vid->listXsize[0], 1); + gen_pic_list_from_frame_list(currSlice->structure, fs_listlt, listltidx, p_Vid->listX[1], &p_Vid->listXsize[1], 1); + + free(fs_list0); + free(fs_list1); + free(fs_listlt); + } + } + + if ((p_Vid->listXsize[0] == p_Vid->listXsize[1]) && (p_Vid->listXsize[0] > 1)) + { + // check if lists are identical, if yes swap first two elements of p_Vid->listX[1] + diff=0; + for (j = 0; j< p_Vid->listXsize[0]; j++) + { + if (p_Vid->listX[0][j]!=p_Vid->listX[1][j]) + diff=1; + } + if (!diff) + { + tmp_s = p_Vid->listX[1][0]; + p_Vid->listX[1][0]=p_Vid->listX[1][1]; + p_Vid->listX[1][1]=tmp_s; + } + } + // set max size + p_Vid->listXsize[0] = (char) imin (p_Vid->listXsize[0], currSlice->num_ref_idx_l0_active); + p_Vid->listXsize[1] = (char) imin (p_Vid->listXsize[1], currSlice->num_ref_idx_l1_active); + + // set the unused list entries to NULL + for (i=p_Vid->listXsize[0]; i< (MAX_LIST_SIZE) ; i++) + { + p_Vid->listX[0][i] = p_Vid->no_reference_picture; + + } + for (i=p_Vid->listXsize[1]; i< (MAX_LIST_SIZE) ; i++) + { + p_Vid->listX[1][i] = p_Vid->no_reference_picture; + } +} + +/*! + ************************************************************************ + * \brief + * Initialize listX[2..5] from lists 0 and 1 + * listX[2]: list0 for current_field==top + * listX[3]: list1 for current_field==top + * listX[4]: list0 for current_field==bottom + * listX[5]: list1 for current_field==bottom + * + ************************************************************************ + */ +void init_mbaff_lists(VideoParameters *p_Vid) +{ + unsigned j; + int i; + + for (i=2;i<6;i++) + { + for (j=0; j<MAX_LIST_SIZE; j++) + { + p_Vid->listX[i][j] = p_Vid->no_reference_picture; + } + p_Vid->listXsize[i]=0; + } + + for (i=0; i<p_Vid->listXsize[0]; i++) + { + p_Vid->listX[2][2*i ] = p_Vid->listX[0][i]->top_field; + p_Vid->listX[2][2*i+1] = p_Vid->listX[0][i]->bottom_field; + p_Vid->listX[4][2*i ] = p_Vid->listX[0][i]->bottom_field; + p_Vid->listX[4][2*i+1] = p_Vid->listX[0][i]->top_field; + } + p_Vid->listXsize[2]=p_Vid->listXsize[4]=p_Vid->listXsize[0] * 2; + + for (i=0; i<p_Vid->listXsize[1]; i++) + { + p_Vid->listX[3][2*i ] = p_Vid->listX[1][i]->top_field; + p_Vid->listX[3][2*i+1] = p_Vid->listX[1][i]->bottom_field; + p_Vid->listX[5][2*i ] = p_Vid->listX[1][i]->bottom_field; + p_Vid->listX[5][2*i+1] = p_Vid->listX[1][i]->top_field; + } + p_Vid->listXsize[3]=p_Vid->listXsize[5]=p_Vid->listXsize[1] * 2; +} + + /*! + ************************************************************************ + * \brief + * Returns short term pic with given picNum + * + ************************************************************************ + */ +static StorablePicture* get_short_term_pic(VideoParameters *p_Vid, int picNum) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + unsigned i; + + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Vid->structure==FRAME) + { + if (p_Dpb->fs_ref[i]->is_reference == 3) + if ((!p_Dpb->fs_ref[i]->frame->is_long_term)&&(p_Dpb->fs_ref[i]->frame->pic_num == picNum)) + return p_Dpb->fs_ref[i]->frame; + } + else + { + if (p_Dpb->fs_ref[i]->is_reference & 1) + if ((!p_Dpb->fs_ref[i]->top_field->is_long_term)&&(p_Dpb->fs_ref[i]->top_field->pic_num == picNum)) + return p_Dpb->fs_ref[i]->top_field; + if (p_Dpb->fs_ref[i]->is_reference & 2) + if ((!p_Dpb->fs_ref[i]->bottom_field->is_long_term)&&(p_Dpb->fs_ref[i]->bottom_field->pic_num == picNum)) + return p_Dpb->fs_ref[i]->bottom_field; + } + } + + return p_Vid->no_reference_picture; +} + +/*! + ************************************************************************ + * \brief + * Returns long term pic with given LongtermPicNum + * + ************************************************************************ + */ +static StorablePicture* get_long_term_pic(VideoParameters *p_Vid, int LongtermPicNum) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + unsigned i; + + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Vid->structure==FRAME) + { + if (p_Dpb->fs_ltref[i]->is_reference == 3) + if ((p_Dpb->fs_ltref[i]->frame->is_long_term)&&(p_Dpb->fs_ltref[i]->frame->long_term_pic_num == LongtermPicNum)) + return p_Dpb->fs_ltref[i]->frame; + } + else + { + if (p_Dpb->fs_ltref[i]->is_reference & 1) + if ((p_Dpb->fs_ltref[i]->top_field->is_long_term)&&(p_Dpb->fs_ltref[i]->top_field->long_term_pic_num == LongtermPicNum)) + return p_Dpb->fs_ltref[i]->top_field; + if (p_Dpb->fs_ltref[i]->is_reference & 2) + if ((p_Dpb->fs_ltref[i]->bottom_field->is_long_term)&&(p_Dpb->fs_ltref[i]->bottom_field->long_term_pic_num == LongtermPicNum)) + return p_Dpb->fs_ltref[i]->bottom_field; + } + } + return NULL; +} + +/*! + ************************************************************************ + * \brief + * Reordering process for short-term reference pictures + * + ************************************************************************ + */ +static void reorder_short_term(VideoParameters *p_Vid, StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int picNumLX, int *refIdxLX) +{ + int cIdx, nIdx; + + StorablePicture *picLX; + + picLX = get_short_term_pic(p_Vid, picNumLX); + + for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- ) + RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1]; + + RefPicListX[ (*refIdxLX)++ ] = picLX; + + nIdx = *refIdxLX; + + for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ ) + if (RefPicListX[ cIdx ]) + if( (RefPicListX[ cIdx ]->is_long_term ) || (RefPicListX[ cIdx ]->pic_num != picNumLX )) + RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ]; + +} + + +/*! + ************************************************************************ + * \brief + * Reordering process for long-term reference pictures + * + ************************************************************************ + */ +static void reorder_long_term(VideoParameters *p_Vid, StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int LongTermPicNum, int *refIdxLX) +{ + int cIdx, nIdx; + + StorablePicture *picLX; + + picLX = get_long_term_pic(p_Vid, LongTermPicNum); + + for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- ) + RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1]; + + RefPicListX[ (*refIdxLX)++ ] = picLX; + + nIdx = *refIdxLX; + + for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ ) + if (RefPicListX[ cIdx ]) + if( (!RefPicListX[ cIdx ]->is_long_term ) || (RefPicListX[ cIdx ]->long_term_pic_num != LongTermPicNum )) + RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ]; +} + + +/*! + ************************************************************************ + * \brief + * Reordering process for reference picture lists + * + ************************************************************************ + */ +void reorder_ref_pic_list(VideoParameters *p_Vid, StorablePicture **list, char *list_size, int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx) +{ + int i; + + int maxPicNum, currPicNum, picNumLXNoWrap, picNumLXPred, picNumLX; + int refIdxLX = 0; + + if (p_Vid->structure==FRAME) + { + maxPicNum = p_Vid->MaxFrameNum; + currPicNum = p_Vid->frame_num; + } + else + { + maxPicNum = 2 * p_Vid->MaxFrameNum; + currPicNum = 2 * p_Vid->frame_num + 1; + } + + picNumLXPred = currPicNum; + + for (i=0; reordering_of_pic_nums_idc[i]!=3; i++) + { + if (reordering_of_pic_nums_idc[i]>3) + error ("Invalid remapping_of_pic_nums_idc command", 500); + + if (reordering_of_pic_nums_idc[i] < 2) + { + if (reordering_of_pic_nums_idc[i] == 0) + { + if( picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) < 0 ) + picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) + maxPicNum; + else + picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ); + } + else // (remapping_of_pic_nums_idc[i] == 1) + { + if( picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) >= maxPicNum ) + picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) - maxPicNum; + else + picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ); + } + picNumLXPred = picNumLXNoWrap; + + if( picNumLXNoWrap > currPicNum ) + picNumLX = picNumLXNoWrap - maxPicNum; + else + picNumLX = picNumLXNoWrap; + + reorder_short_term(p_Vid, list, num_ref_idx_lX_active_minus1, picNumLX, &refIdxLX); + } + else //(remapping_of_pic_nums_idc[i] == 2) + { + reorder_long_term(p_Vid, list, num_ref_idx_lX_active_minus1, long_term_pic_idx[i], &refIdxLX); + } + + } + // that's a definition + *list_size = (char)(num_ref_idx_lX_active_minus1 + 1); +} + + + +/*! + ************************************************************************ + * \brief + * Update the list of frame stores that contain reference frames/fields + * + ************************************************************************ + */ +void update_ref_list(DecodedPictureBuffer *p_Dpb) +{ + unsigned i, j; + for (i=0, j=0; i<p_Dpb->used_size; i++) + { + if (is_short_term_reference(p_Dpb->fs[i])) + { + p_Dpb->fs_ref[j++]=p_Dpb->fs[i]; + } + } + + p_Dpb->ref_frames_in_buffer = j; + + while (j<p_Dpb->size) + { + p_Dpb->fs_ref[j++]=NULL; + } +} + + +/*! + ************************************************************************ + * \brief + * Update the list of frame stores that contain long-term reference + * frames/fields + * + ************************************************************************ + */ +void update_ltref_list(DecodedPictureBuffer *p_Dpb) +{ + unsigned i, j; + for (i=0, j=0; i<p_Dpb->used_size; i++) + { + if (is_long_term_reference(p_Dpb->fs[i])) + { + p_Dpb->fs_ltref[j++]=p_Dpb->fs[i]; + } + } + + p_Dpb->ltref_frames_in_buffer=j; + + while (j<p_Dpb->size) + { + p_Dpb->fs_ltref[j++]=NULL; + } +} + +/*! + ************************************************************************ + * \brief + * Perform Memory management for idr pictures + * + ************************************************************************ + */ +static void idr_memory_management(VideoParameters *p_Vid, StorablePicture* p) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + unsigned i; + + assert (p->idr_flag); + + if (p->no_output_of_prior_pics_flag) + { + // free all stored pictures + for (i=0; i<p_Dpb->used_size; i++) + { + // reset all reference settings + free_frame_store(p_Vid, p_Dpb->fs[i]); + p_Dpb->fs[i] = alloc_frame_store(); + } + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + p_Dpb->fs_ref[i]=NULL; + } + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + p_Dpb->fs_ltref[i]=NULL; + } + p_Dpb->used_size=0; + } + else + { + flush_dpb(p_Vid); + } + p_Dpb->last_picture = NULL; + + update_ref_list(p_Dpb); + update_ltref_list(p_Dpb); + p_Dpb->last_output_poc = INT_MIN; + + if (p->long_term_reference_flag) + { + p_Dpb->max_long_term_pic_idx = 0; + p->is_long_term = 1; + p->long_term_frame_idx = 0; + } + else + { + p_Dpb->max_long_term_pic_idx = -1; + p->is_long_term = 0; + } +} + +/*! + ************************************************************************ + * \brief + * Perform Sliding window decoded reference picture marking process + * + ************************************************************************ + */ +static void sliding_window_memory_management(DecodedPictureBuffer *p_Dpb, StorablePicture* p) +{ + unsigned i; + + assert (!p->idr_flag); + // if this is a reference pic with sliding sliding window, unmark first ref frame + if (p_Dpb->ref_frames_in_buffer==p_Dpb->num_ref_frames - p_Dpb->ltref_frames_in_buffer) + { + for (i=0; i<p_Dpb->used_size;i++) + { + if (p_Dpb->fs[i]->is_reference && (!(p_Dpb->fs[i]->is_long_term))) + { + unmark_for_reference(p_Dpb->p_Vid, p_Dpb->fs[i]); + update_ref_list(p_Dpb); + break; + } + } + } + + p->is_long_term = 0; +} + +/*! + ************************************************************************ + * \brief + * Calculate picNumX + ************************************************************************ + */ +static int get_pic_num_x (StorablePicture *p, int difference_of_pic_nums_minus1) +{ + int currPicNum; + + if (p->structure == FRAME) + currPicNum = p->frame_num; + else + currPicNum = 2 * p->frame_num + 1; + + return currPicNum - (difference_of_pic_nums_minus1 + 1); +} + + +/*! + ************************************************************************ + * \brief + * Adaptive Memory Management: Mark short term picture unused + ************************************************************************ + */ +static void mm_unmark_short_term_for_reference(DecodedPictureBuffer *p_Dpb, StorablePicture *p, int difference_of_pic_nums_minus1) +{ + int picNumX; + + unsigned i; + + picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1); + + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p->structure == FRAME) + { + if ((p_Dpb->fs_ref[i]->is_reference==3) && (p_Dpb->fs_ref[i]->is_long_term==0)) + { + if (p_Dpb->fs_ref[i]->frame->pic_num == picNumX) + { + unmark_for_reference(p_Dpb->p_Vid, p_Dpb->fs_ref[i]); + return; + } + } + } + else + { + if ((p_Dpb->fs_ref[i]->is_reference & 1) && (!(p_Dpb->fs_ref[i]->is_long_term & 1))) + { + if (p_Dpb->fs_ref[i]->top_field->pic_num == picNumX) + { + p_Dpb->fs_ref[i]->top_field->used_for_reference = 0; + p_Dpb->fs_ref[i]->is_reference &= 2; + if (p_Dpb->fs_ref[i]->is_used == 3) + { + p_Dpb->fs_ref[i]->frame->used_for_reference = 0; + } + return; + } + } + if ((p_Dpb->fs_ref[i]->is_reference & 2) && (!(p_Dpb->fs_ref[i]->is_long_term & 2))) + { + if (p_Dpb->fs_ref[i]->bottom_field->pic_num == picNumX) + { + p_Dpb->fs_ref[i]->bottom_field->used_for_reference = 0; + p_Dpb->fs_ref[i]->is_reference &= 1; + if (p_Dpb->fs_ref[i]->is_used == 3) + { + p_Dpb->fs_ref[i]->frame->used_for_reference = 0; + } + return; + } + } + } + } +} + + +/*! + ************************************************************************ + * \brief + * Adaptive Memory Management: Mark long term picture unused + ************************************************************************ + */ +static void mm_unmark_long_term_for_reference(DecodedPictureBuffer *p_Dpb, StorablePicture *p, int long_term_pic_num) +{ + unsigned i; + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p->structure == FRAME) + { + if ((p_Dpb->fs_ltref[i]->is_reference==3) && (p_Dpb->fs_ltref[i]->is_long_term==3)) + { + if (p_Dpb->fs_ltref[i]->frame->long_term_pic_num == long_term_pic_num) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + } + } + else + { + if ((p_Dpb->fs_ltref[i]->is_reference & 1) && ((p_Dpb->fs_ltref[i]->is_long_term & 1))) + { + if (p_Dpb->fs_ltref[i]->top_field->long_term_pic_num == long_term_pic_num) + { + p_Dpb->fs_ltref[i]->top_field->used_for_reference = 0; + p_Dpb->fs_ltref[i]->top_field->is_long_term = 0; + p_Dpb->fs_ltref[i]->is_reference &= 2; + p_Dpb->fs_ltref[i]->is_long_term &= 2; + if (p_Dpb->fs_ltref[i]->is_used == 3) + { + p_Dpb->fs_ltref[i]->frame->used_for_reference = 0; + p_Dpb->fs_ltref[i]->frame->is_long_term = 0; + } + return; + } + } + if ((p_Dpb->fs_ltref[i]->is_reference & 2) && ((p_Dpb->fs_ltref[i]->is_long_term & 2))) + { + if (p_Dpb->fs_ltref[i]->bottom_field->long_term_pic_num == long_term_pic_num) + { + p_Dpb->fs_ltref[i]->bottom_field->used_for_reference = 0; + p_Dpb->fs_ltref[i]->bottom_field->is_long_term = 0; + p_Dpb->fs_ltref[i]->is_reference &= 1; + p_Dpb->fs_ltref[i]->is_long_term &= 1; + if (p_Dpb->fs_ltref[i]->is_used == 3) + { + p_Dpb->fs_ltref[i]->frame->used_for_reference = 0; + p_Dpb->fs_ltref[i]->frame->is_long_term = 0; + } + return; + } + } + } + } +} + + +/*! + ************************************************************************ + * \brief + * Mark a long-term reference frame or complementary field pair unused for referemce + ************************************************************************ + */ +static void unmark_long_term_frame_for_reference_by_frame_idx(DecodedPictureBuffer *p_Dpb, int long_term_frame_idx) +{ + unsigned i; + for(i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->long_term_frame_idx == long_term_frame_idx) + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } +} + +/*! + ************************************************************************ + * \brief + * Mark a long-term reference field unused for reference only if it's not + * the complementary field of the picture indicated by picNumX + ************************************************************************ + */ +static void unmark_long_term_field_for_reference_by_frame_idx(VideoParameters *p_Vid, PictureStructure structure, int long_term_frame_idx, int mark_current, unsigned curr_frame_num, int curr_pic_num) +{ + unsigned i; + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + + assert(structure!=FRAME); + if (curr_pic_num<0) + curr_pic_num+=(2*p_Vid->MaxFrameNum); + + for(i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->long_term_frame_idx == long_term_frame_idx) + { + if (structure == TOP_FIELD) + { + if ((p_Dpb->fs_ltref[i]->is_long_term == 3)) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + else + { + if ((p_Dpb->fs_ltref[i]->is_long_term == 1)) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + else + { + if (mark_current) + { + if (p_Dpb->last_picture) + { + if ( ( p_Dpb->last_picture != p_Dpb->fs_ltref[i] )|| p_Dpb->last_picture->frame_num != curr_frame_num) + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + else + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + } + else + { + if ((p_Dpb->fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2)) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + } + } + } + } + if (structure == BOTTOM_FIELD) + { + if ((p_Dpb->fs_ltref[i]->is_long_term == 3)) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + else + { + if ((p_Dpb->fs_ltref[i]->is_long_term == 2)) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + else + { + if (mark_current) + { + if (p_Dpb->last_picture) + { + if ( ( p_Dpb->last_picture != p_Dpb->fs_ltref[i] )|| p_Dpb->last_picture->frame_num != curr_frame_num) + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + else + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + } + else + { + if ((p_Dpb->fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2)) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + } + } + } + } + } + } +} + + +/*! + ************************************************************************ + * \brief + * mark a picture as long-term reference + ************************************************************************ + */ +static void mark_pic_long_term(DecodedPictureBuffer *p_Dpb, StorablePicture* p, int long_term_frame_idx, int picNumX) +{ + unsigned i; + int add_top, add_bottom; + + if (p->structure == FRAME) + { + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_reference == 3) + { + if ((!p_Dpb->fs_ref[i]->frame->is_long_term)&&(p_Dpb->fs_ref[i]->frame->pic_num == picNumX)) + { + p_Dpb->fs_ref[i]->long_term_frame_idx = p_Dpb->fs_ref[i]->frame->long_term_frame_idx + = long_term_frame_idx; + p_Dpb->fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx; + p_Dpb->fs_ref[i]->frame->is_long_term = 1; + + if (p_Dpb->fs_ref[i]->top_field && p_Dpb->fs_ref[i]->bottom_field) + { + p_Dpb->fs_ref[i]->top_field->long_term_frame_idx = p_Dpb->fs_ref[i]->bottom_field->long_term_frame_idx + = long_term_frame_idx; + p_Dpb->fs_ref[i]->top_field->long_term_pic_num = long_term_frame_idx; + p_Dpb->fs_ref[i]->bottom_field->long_term_pic_num = long_term_frame_idx; + + p_Dpb->fs_ref[i]->top_field->is_long_term = p_Dpb->fs_ref[i]->bottom_field->is_long_term + = 1; + + } + p_Dpb->fs_ref[i]->is_long_term = 3; + return; + } + } + } + printf ("Warning: reference frame for long term marking not found\n"); + } + else + { + if (p->structure == TOP_FIELD) + { + add_top = 1; + add_bottom = 0; + } + else + { + add_top = 0; + add_bottom = 1; + } + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_reference & 1) + { + if ((!p_Dpb->fs_ref[i]->top_field->is_long_term)&&(p_Dpb->fs_ref[i]->top_field->pic_num == picNumX)) + { + if ((p_Dpb->fs_ref[i]->is_long_term) && (p_Dpb->fs_ref[i]->long_term_frame_idx != long_term_frame_idx)) + { + printf ("Warning: assigning long_term_frame_idx different from other field\n"); + } + + p_Dpb->fs_ref[i]->long_term_frame_idx = p_Dpb->fs_ref[i]->top_field->long_term_frame_idx + = long_term_frame_idx; + p_Dpb->fs_ref[i]->top_field->long_term_pic_num = 2 * long_term_frame_idx + add_top; + p_Dpb->fs_ref[i]->top_field->is_long_term = 1; + p_Dpb->fs_ref[i]->is_long_term |= 1; + if (p_Dpb->fs_ref[i]->is_long_term == 3) + { + p_Dpb->fs_ref[i]->frame->is_long_term = 1; + p_Dpb->fs_ref[i]->frame->long_term_frame_idx = p_Dpb->fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx; + } + return; + } + } + if (p_Dpb->fs_ref[i]->is_reference & 2) + { + if ((!p_Dpb->fs_ref[i]->bottom_field->is_long_term)&&(p_Dpb->fs_ref[i]->bottom_field->pic_num == picNumX)) + { + if ((p_Dpb->fs_ref[i]->is_long_term) && (p_Dpb->fs_ref[i]->long_term_frame_idx != long_term_frame_idx)) + { + printf ("Warning: assigning long_term_frame_idx different from other field\n"); + } + + p_Dpb->fs_ref[i]->long_term_frame_idx = p_Dpb->fs_ref[i]->bottom_field->long_term_frame_idx + = long_term_frame_idx; + p_Dpb->fs_ref[i]->bottom_field->long_term_pic_num = 2 * long_term_frame_idx + add_bottom; + p_Dpb->fs_ref[i]->bottom_field->is_long_term = 1; + p_Dpb->fs_ref[i]->is_long_term |= 2; + if (p_Dpb->fs_ref[i]->is_long_term == 3) + { + p_Dpb->fs_ref[i]->frame->is_long_term = 1; + p_Dpb->fs_ref[i]->frame->long_term_frame_idx = p_Dpb->fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx; + } + return; + } + } + } + printf ("Warning: reference field for long term marking not found\n"); + } +} + + +/*! + ************************************************************************ + * \brief + * Assign a long term frame index to a short term picture + ************************************************************************ + */ +static void mm_assign_long_term_frame_idx(VideoParameters *p_Vid, StorablePicture* p, int difference_of_pic_nums_minus1, int long_term_frame_idx) +{ + int picNumX; + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + + picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1); + + // remove frames/fields with same long_term_frame_idx + if (p->structure == FRAME) + { + unmark_long_term_frame_for_reference_by_frame_idx(p_Dpb, long_term_frame_idx); + } + else + { + unsigned i; + PictureStructure structure = FRAME; + + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->is_reference & 1) + { + if (p_Dpb->fs_ref[i]->top_field->pic_num == picNumX) + { + structure = TOP_FIELD; + break; + } + } + if (p_Dpb->fs_ref[i]->is_reference & 2) + { + if (p_Dpb->fs_ref[i]->bottom_field->pic_num == picNumX) + { + structure = BOTTOM_FIELD; + break; + } + } + } + if (structure==FRAME) + { + error ("field for long term marking not found",200); + } + + unmark_long_term_field_for_reference_by_frame_idx(p_Vid, structure, long_term_frame_idx, 0, 0, picNumX); + } + + mark_pic_long_term(p_Dpb, p, long_term_frame_idx, picNumX); +} + +/*! + ************************************************************************ + * \brief + * Set new max long_term_frame_idx + ************************************************************************ + */ +void mm_update_max_long_term_frame_idx(DecodedPictureBuffer *p_Dpb, int max_long_term_frame_idx_plus1) +{ + unsigned i; + + p_Dpb->max_long_term_pic_idx = max_long_term_frame_idx_plus1 - 1; + + // check for invalid frames + for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ltref[i]->long_term_frame_idx > p_Dpb->max_long_term_pic_idx) + { + unmark_for_long_term_reference(p_Dpb->fs_ltref[i]); + } + } +} + + +/*! + ************************************************************************ + * \brief + * Mark all long term reference pictures unused for reference + ************************************************************************ + */ +static void mm_unmark_all_long_term_for_reference (DecodedPictureBuffer *p_Dpb) +{ + mm_update_max_long_term_frame_idx(p_Dpb, 0); +} + +/*! + ************************************************************************ + * \brief + * Mark all short term reference pictures unused for reference + ************************************************************************ + */ +static void mm_unmark_all_short_term_for_reference (DecodedPictureBuffer *p_Dpb) +{ + unsigned int i; + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + unmark_for_reference(p_Dpb->p_Vid, p_Dpb->fs_ref[i]); + } + update_ref_list(p_Dpb); +} + + +/*! + ************************************************************************ + * \brief + * Mark the current picture used for long term reference + ************************************************************************ + */ +static void mm_mark_current_picture_long_term(VideoParameters *p_Vid, DecodedPictureBuffer *p_Dpb, StorablePicture *p, int long_term_frame_idx) +{ + // remove long term pictures with same long_term_frame_idx + if (p->structure == FRAME) + { + unmark_long_term_frame_for_reference_by_frame_idx(p_Dpb, long_term_frame_idx); + } + else + { + unmark_long_term_field_for_reference_by_frame_idx(p_Vid, p->structure, long_term_frame_idx, 1, p->pic_num, 0); + } + + p->is_long_term = 1; + p->long_term_frame_idx = long_term_frame_idx; +} + + +/*! + ************************************************************************ + * \brief + * Perform Adaptive memory control decoded reference picture marking process + ************************************************************************ + */ +static void adaptive_memory_management(VideoParameters *p_Vid, StorablePicture* p) +{ + DecRefPicMarking_t *tmp_drpm; + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + + p_Vid->last_has_mmco_5 = 0; + + assert (!p->idr_flag); + assert (p->adaptive_ref_pic_buffering_flag); + + while (p->dec_ref_pic_marking_buffer) + { + tmp_drpm = p->dec_ref_pic_marking_buffer; + switch (tmp_drpm->memory_management_control_operation) + { + case 0: + if (tmp_drpm->Next != NULL) + { + error ("memory_management_control_operation = 0 not last operation in buffer", 500); + } + break; + case 1: + mm_unmark_short_term_for_reference(p_Dpb, p, tmp_drpm->difference_of_pic_nums_minus1); + update_ref_list(p_Dpb); + break; + case 2: + mm_unmark_long_term_for_reference(p_Dpb, p, tmp_drpm->long_term_pic_num); + update_ltref_list(p_Dpb); + break; + case 3: + mm_assign_long_term_frame_idx(p_Vid, p, tmp_drpm->difference_of_pic_nums_minus1, tmp_drpm->long_term_frame_idx); + update_ref_list(p_Dpb); + update_ltref_list(p_Dpb); + break; + case 4: + mm_update_max_long_term_frame_idx (p_Dpb, tmp_drpm->max_long_term_frame_idx_plus1); + update_ltref_list(p_Dpb); + break; + case 5: + mm_unmark_all_short_term_for_reference(p_Dpb); + mm_unmark_all_long_term_for_reference(p_Dpb); + p_Vid->last_has_mmco_5 = 1; + break; + case 6: + mm_mark_current_picture_long_term(p_Vid, p_Dpb, p, tmp_drpm->long_term_frame_idx); + check_num_ref(p_Dpb); + break; + default: + error ("invalid memory_management_control_operation in buffer", 500); + } + p->dec_ref_pic_marking_buffer = tmp_drpm->Next; + free (tmp_drpm); + } + if ( p_Vid->last_has_mmco_5 ) + { + p->pic_num = p->frame_num = 0; + + switch (p->structure) + { + case TOP_FIELD: + { + p->poc = p->top_poc = p_Vid->toppoc =0; + break; + } + case BOTTOM_FIELD: + { + p->poc = p->bottom_poc = p_Vid->bottompoc = 0; + break; + } + case FRAME: + { + p->top_poc -= p->poc; + p->bottom_poc -= p->poc; + + p_Vid->toppoc = p->top_poc; + p_Vid->bottompoc = p->bottom_poc; + + p->poc = imin (p->top_poc, p->bottom_poc); + p_Vid->framepoc = p->poc; + break; + } + } + p_Vid->ThisPOC = p->poc; + flush_dpb(p_Vid); + } +} + + +/*! + ************************************************************************ + * \brief + * Store a picture in DPB. This includes cheking for space in DPB and + * flushing frames. + * If we received a frame, we need to check for a new store, if we + * got a field, check if it's the second field of an already allocated + * store. + * + * \param p_Vid + * image decoding parameters for current picture + * \param p + * Picture to be stored + * + ************************************************************************ + */ + +void store_picture_in_dpb(VideoParameters *p_Vid, StorablePicture* p) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + unsigned i; + int poc, pos; + // picture error concealment + + // diagnostics + //printf ("Storing (%s) non-ref pic with frame_num #%d\n", (p->type == FRAME)?"FRAME":(p->type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num); + // if frame, check for new store, + assert (p!=NULL); + + p_Vid->last_has_mmco_5=0; + p_Vid->last_pic_bottom_field = (p->structure == BOTTOM_FIELD); + + if (p->idr_flag) + { + idr_memory_management(p_Vid, p); + // picture error concealment + memset(p_Vid->pocs_in_dpb, 0, sizeof(int)*100); + } + else + { + // adaptive memory management + if (p->used_for_reference && (p->adaptive_ref_pic_buffering_flag)) + adaptive_memory_management(p_Vid, p); + } + + if ((p->structure==TOP_FIELD)||(p->structure==BOTTOM_FIELD)) + { + // check for frame store with same pic_number + if (p_Dpb->last_picture) + { + if ((int)p_Dpb->last_picture->frame_num == p->pic_num) + { + if (((p->structure==TOP_FIELD)&&(p_Dpb->last_picture->is_used==2))||((p->structure==BOTTOM_FIELD)&&(p_Dpb->last_picture->is_used==1))) + { + if ((p->used_for_reference && (p_Dpb->last_picture->is_orig_reference!=0))|| + (!p->used_for_reference && (p_Dpb->last_picture->is_orig_reference==0))) + { + insert_picture_in_dpb(p_Vid, p_Dpb->last_picture, p); + update_ref_list(p_Dpb); + update_ltref_list(p_Dpb); + dump_dpb(p_Dpb); + p_Dpb->last_picture = NULL; + return; + } + } + } + } + } + + // this is a frame or a field which has no stored complementary field + + // sliding window, if necessary + if ((!p->idr_flag)&&(p->used_for_reference && (!p->adaptive_ref_pic_buffering_flag))) + { + sliding_window_memory_management(p_Dpb, p); + } + + // first try to remove unused frames + if (p_Dpb->used_size==p_Dpb->size) + { + remove_unused_frame_from_dpb(p_Vid, p_Dpb); + } + + // then output frames until one can be removed + while (p_Dpb->used_size == p_Dpb->size) + { + // non-reference frames may be output directly + if (!p->used_for_reference) + { + get_smallest_poc(p_Dpb, &poc, &pos); + if ((-1==pos) || (p->poc < poc)) + { + direct_output(p_Vid, p); + return; + } + } + // flush a frame + output_one_frame_from_dpb(p_Vid); + } + + // check for duplicate frame number in short term reference buffer + if ((p->used_for_reference)&&(!p->is_long_term)) + { + for (i=0; i<p_Dpb->ref_frames_in_buffer; i++) + { + if (p_Dpb->fs_ref[i]->frame_num == p->frame_num) + { + //error("duplicate frame_num in short-term reference picture buffer", 500); + //printf("warning"); + //return; + } + } + + } + // store at end of buffer + insert_picture_in_dpb(p_Vid, p_Dpb->fs[p_Dpb->used_size],p); + + // picture error concealment + if (p->idr_flag) + { + p_Vid->earlier_missing_poc = 0; + } + + if (p->structure != FRAME) + { + p_Dpb->last_picture = p_Dpb->fs[p_Dpb->used_size]; + } + else + { + p_Dpb->last_picture = NULL; + } + + p_Dpb->used_size++; + + update_ref_list(p_Dpb); + update_ltref_list(p_Dpb); + + check_num_ref(p_Dpb); + + dump_dpb(p_Dpb); +} + +/*! + ************************************************************************ + * \brief + * Insert the picture into the DPB. A free DPB position is necessary + * for frames, . + * + * \param p_Vid + * image decoding parameters for current picture + * \param fs + * FrameStore into which the picture will be inserted + * \param p + * StorablePicture to be inserted + * + ************************************************************************ + */ +static void insert_picture_in_dpb(VideoParameters *p_Vid, FrameStore* fs, StorablePicture* p) +{ + InputParameters *p_Inp = p_Vid->p_Inp; +// printf ("insert (%s) pic with frame_num #%d, poc %d\n", (p->structure == FRAME)?"FRAME":(p->structure == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num, p->poc); + assert (p!=NULL); + assert (fs!=NULL); + switch (p->structure) + { + case FRAME: + fs->frame = p; + fs->is_used = 3; + if (p->used_for_reference) + { + fs->is_reference = 3; + fs->is_orig_reference = 3; + if (p->is_long_term) + { + fs->is_long_term = 3; + fs->long_term_frame_idx = p->long_term_frame_idx; + } + } + // generate field views + dpb_split_field(p_Vid, fs); + break; + case TOP_FIELD: + fs->top_field = p; + fs->is_used |= 1; + if (p->used_for_reference) + { + fs->is_reference |= 1; + fs->is_orig_reference |= 1; + if (p->is_long_term) + { + fs->is_long_term |= 1; + fs->long_term_frame_idx = p->long_term_frame_idx; + } + } + if (fs->is_used == 3) + { + // generate frame view + dpb_combine_field(p_Vid, fs); + fs->frame->time_code = p->time_code; + } else + { + fs->poc = p->poc; + gen_field_ref_ids(p); + } + break; + case BOTTOM_FIELD: + fs->bottom_field = p; + fs->is_used |= 2; + if (p->used_for_reference) + { + fs->is_reference |= 2; + fs->is_orig_reference |= 2; + if (p->is_long_term) + { + fs->is_long_term |= 2; + fs->long_term_frame_idx = p->long_term_frame_idx; + } + } + if (fs->is_used == 3) + { + // generate frame view + dpb_combine_field(p_Vid, fs); + fs->frame->time_code = p->time_code; + } + else + { + fs->poc = p->poc; + gen_field_ref_ids(p); + } + break; + } + fs->frame_num = p->pic_num; + fs->recovery_frame = p->recovery_frame; + + fs->is_output = p->is_output; +} + +/*! + ************************************************************************ + * \brief + * Check if one of the frames/fields in frame store is used for reference + ************************************************************************ + */ +static int is_used_for_reference(FrameStore* fs) +{ + if (fs->is_reference) + { + return 1; + } + + if (fs->is_used == 3) // frame + { + if (fs->frame->used_for_reference) + { + return 1; + } + } + + if (fs->is_used & 1) // top field + { + if (fs->top_field) + { + if (fs->top_field->used_for_reference) + { + return 1; + } + } + } + + if (fs->is_used & 2) // bottom field + { + if (fs->bottom_field) + { + if (fs->bottom_field->used_for_reference) + { + return 1; + } + } + } + return 0; +} + + +/*! + ************************************************************************ + * \brief + * Check if one of the frames/fields in frame store is used for short-term reference + ************************************************************************ + */ +static int is_short_term_reference(FrameStore* fs) +{ + + if (fs->is_used==3) // frame + { + if ((fs->frame->used_for_reference)&&(!fs->frame->is_long_term)) + { + return 1; + } + } + + if (fs->is_used & 1) // top field + { + if (fs->top_field) + { + if ((fs->top_field->used_for_reference)&&(!fs->top_field->is_long_term)) + { + return 1; + } + } + } + + if (fs->is_used & 2) // bottom field + { + if (fs->bottom_field) + { + if ((fs->bottom_field->used_for_reference)&&(!fs->bottom_field->is_long_term)) + { + return 1; + } + } + } + return 0; +} + + +/*! + ************************************************************************ + * \brief + * Check if one of the frames/fields in frame store is used for short-term reference + ************************************************************************ + */ +static int is_long_term_reference(FrameStore* fs) +{ + + if (fs->is_used==3) // frame + { + if ((fs->frame->used_for_reference)&&(fs->frame->is_long_term)) + { + return 1; + } + } + + if (fs->is_used & 1) // top field + { + if (fs->top_field) + { + if ((fs->top_field->used_for_reference)&&(fs->top_field->is_long_term)) + { + return 1; + } + } + } + + if (fs->is_used & 2) // bottom field + { + if (fs->bottom_field) + { + if ((fs->bottom_field->used_for_reference)&&(fs->bottom_field->is_long_term)) + { + return 1; + } + } + } + return 0; +} + + +/*! + ************************************************************************ + * \brief + * remove one frame from DPB + ************************************************************************ + */ +static void remove_frame_from_dpb(VideoParameters *p_Vid, int pos) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + FrameStore* fs = p_Dpb->fs[pos]; + FrameStore* tmp; + unsigned i; + +// printf ("remove frame with frame_num #%d\n", fs->frame_num); + switch (fs->is_used) + { + case 3: + free_storable_picture(p_Vid, fs->frame); + free_storable_picture(p_Vid, fs->top_field); + free_storable_picture(p_Vid, fs->bottom_field); + fs->frame=NULL; + fs->top_field=NULL; + fs->bottom_field=NULL; + break; + case 2: + free_storable_picture(p_Vid, fs->bottom_field); + fs->bottom_field=NULL; + break; + case 1: + free_storable_picture(p_Vid, fs->top_field); + fs->top_field=NULL; + break; + case 0: + break; + default: + error("invalid frame store type",500); + } + fs->is_used = 0; + fs->is_long_term = 0; + fs->is_reference = 0; + fs->is_orig_reference = 0; + + // move empty framestore to end of buffer + tmp = p_Dpb->fs[pos]; + + for (i=pos; i<p_Dpb->used_size-1;i++) + { + p_Dpb->fs[i] = p_Dpb->fs[i+1]; + } + p_Dpb->fs[p_Dpb->used_size-1] = tmp; + p_Dpb->used_size--; +} + +/*! + ************************************************************************ + * \brief + * find smallest POC in the DPB. + ************************************************************************ + */ +static void get_smallest_poc(DecodedPictureBuffer *p_Dpb, int *poc,int * pos) +{ + unsigned i; + + if (p_Dpb->used_size<1) + { + error("Cannot determine smallest POC, DPB empty.",150); + } + + *pos=-1; + *poc = INT_MAX; + for (i=0; i<p_Dpb->used_size; i++) + { + if ((*poc > p_Dpb->fs[i]->poc)&&(!p_Dpb->fs[i]->is_output)) + { + *poc = p_Dpb->fs[i]->poc; + *pos=i; + } + } +} + +/*! + ************************************************************************ + * \brief + * Remove a picture from DPB which is no longer needed. + ************************************************************************ + */ +static int remove_unused_frame_from_dpb(VideoParameters *p_Vid, DecodedPictureBuffer *p_Dpb) +{ + unsigned i; + + // check for frames that were already output and no longer used for reference + for (i = 0; i < p_Dpb->used_size; i++) + { + if (p_Dpb->fs[i]->is_output && (!is_used_for_reference(p_Dpb->fs[i]))) + { + remove_frame_from_dpb(p_Vid, i); + return 1; + } + } + return 0; +} + +/*! + ************************************************************************ + * \brief + * Output one picture stored in the DPB. + ************************************************************************ + */ +static void output_one_frame_from_dpb(VideoParameters *p_Vid) +{ + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + int poc, pos; + //diagnostics + if (p_Dpb->used_size<1) + { + error("Cannot output frame, DPB empty.",150); + } + + // find smallest POC + get_smallest_poc(p_Dpb, &poc, &pos); + + if(pos==-1) + { + error("no frames for output available", 150); + } + + // call the output function +// printf ("output frame with frame_num #%d, poc %d (dpb. p_Dpb->size=%d, p_Dpb->used_size=%d)\n", p_Dpb->fs[pos]->frame_num, p_Dpb->fs[pos]->frame->poc, p_Dpb->size, p_Dpb->used_size); + + +// JVT-P072 ends + + write_stored_frame(p_Vid, p_Dpb->fs[pos]); + + + if (p_Dpb->last_output_poc >= poc) + { + //printf("warning"); + //error ("output POC must be in ascending order", 150); + } + p_Dpb->last_output_poc = poc; + // free frame store and move empty store to end of buffer + if (!is_used_for_reference(p_Dpb->fs[pos])) + { + remove_frame_from_dpb(p_Vid, pos); + } +} + + + +/*! + ************************************************************************ + * \brief + * All stored picture are output. Should be called to empty the buffer + ************************************************************************ + */ +void flush_dpb(VideoParameters *p_Vid) +{ + unsigned i; + DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb; + + //diagnostics +// printf("Flush remaining frames from dpb. p_Dpb->size=%d, p_Dpb->used_size=%d\n",p_Dpb->size,p_Dpb->used_size); + + + // mark all frames unused + for (i=0; i<p_Dpb->used_size; i++) + { + unmark_for_reference (p_Vid, p_Dpb->fs[i]); + } + + while (remove_unused_frame_from_dpb(p_Vid, p_Dpb)) ; + + // output frames in POC order + while (p_Dpb->used_size) + { + output_one_frame_from_dpb(p_Vid); + } + + p_Dpb->last_output_poc = INT_MIN; +} + + +static void gen_field_ref_ids(StorablePicture *p) +{ + int i,j, dummylist0, dummylist1; + //! Generate Frame parameters from field information. + for (i=0 ; i<p->size_x/4 ; i++) + { + for (j=0 ; j<p->size_y/4 ; j++) + { + dummylist0= p->motion.motion[LIST_0][j][i].ref_idx; + dummylist1= p->motion.motion[LIST_1][j][i].ref_idx; + //! association with id already known for fields. + p->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? p->ref_pic_num[p->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + p->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? p->ref_pic_num[p->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + p->motion.field_frame[j][i]=1; + } + } +} + +/*! + ************************************************************************ + * \brief + * Extract top field from a frame + ************************************************************************ + */ +void dpb_split_field(VideoParameters *p_Vid, FrameStore *fs) +{ + int i, j, ii, jj, jj4; + int idiv,jdiv; + int currentmb; + int dummylist0, dummylist1; + int twosz16 = 2 * (fs->frame->size_x >> 4); + StorablePicture *fs_top, *fs_btm; + StorablePicture *frame = fs->frame; + + + fs->poc = frame->poc; + + if (!frame->frame_mbs_only_flag) + { + fs_top = fs->top_field = alloc_storable_picture(p_Vid, TOP_FIELD, frame->size_x, frame->size_y, frame->size_x_cr, frame->size_y_cr); + fs_btm = fs->bottom_field = alloc_storable_picture(p_Vid, BOTTOM_FIELD, frame->size_x, frame->size_y, frame->size_x_cr, frame->size_y_cr); + + for (i = 0; i < (frame->size_y>>1); i++) + { + memcpy(fs_top->imgY->img[i], frame->imgY->img[i*2], frame->size_x*sizeof(imgpel)); + } + + for (i = 0; i< (frame->size_y_cr>>1); i++) + { + memcpy(fs_top->imgUV[0]->img[i], frame->imgUV[0]->img[i*2], frame->size_x_cr*sizeof(imgpel)); + memcpy(fs_top->imgUV[1]->img[i], frame->imgUV[1]->img[i*2], frame->size_x_cr*sizeof(imgpel)); + } + + for (i = 0; i < (frame->size_y>>1); i++) + { + memcpy(fs_btm->imgY->img[i], frame->imgY->img[i*2 + 1], frame->size_x*sizeof(imgpel)); + } + + for (i = 0; i < (frame->size_y_cr>>1); i++) + { + memcpy(fs_btm->imgUV[0]->img[i], frame->imgUV[0]->img[i*2 + 1], frame->size_x_cr*sizeof(imgpel)); + memcpy(fs_btm->imgUV[1]->img[i], frame->imgUV[1]->img[i*2 + 1], frame->size_x_cr*sizeof(imgpel)); + } + + fs_top->poc = frame->top_poc; + fs_btm->poc = frame->bottom_poc; + + fs_top->frame_poc = frame->frame_poc; + + fs_top->bottom_poc = fs_btm->bottom_poc = frame->bottom_poc; + fs_top->top_poc = fs_btm->top_poc = frame->top_poc; + fs_btm->frame_poc = frame->frame_poc; + + fs_top->used_for_reference = fs_btm->used_for_reference + = frame->used_for_reference; + fs_top->is_long_term = fs_btm->is_long_term + = frame->is_long_term; + fs->long_term_frame_idx = fs_top->long_term_frame_idx + = fs_btm->long_term_frame_idx + = frame->long_term_frame_idx; + + fs_top->coded_frame = fs_btm->coded_frame = 1; + fs_top->mb_aff_frame_flag = fs_btm->mb_aff_frame_flag + = frame->mb_aff_frame_flag; + + frame->top_field = fs_top; + frame->bottom_field = fs_btm; + + fs_top->bottom_field = fs_btm; + fs_top->frame = frame; + fs_btm->top_field = fs_top; + fs_btm->frame = frame; + + fs_top->chroma_format_idc = fs_btm->chroma_format_idc = frame->chroma_format_idc; + + //store reference picture index + for (j=0; j<=frame->max_slice_id; j++) + { + memcpy(&fs_top->ref_pic_num[j][LIST_0][0], &frame->ref_pic_num[j][2 + LIST_0][0], 66 * sizeof(h264_ref_t)); + //memcpy(&fs_top->ref_pic_num[j][LIST_1][0], &frame->ref_pic_num[j][2 + LIST_1][0], 33 * sizeof(int64)); + memcpy(&fs_btm->ref_pic_num[j][LIST_0][0], &frame->ref_pic_num[j][4 + LIST_0][0], 66 * sizeof(h264_ref_t)); + //memcpy(&fs_btm->ref_pic_num[j][LIST_1][0], &frame->ref_pic_num[j][4 + LIST_1][0], 33 * sizeof(int64)); + } + } + else + { + fs_top=NULL; + fs_btm=NULL; + frame->top_field=NULL; + frame->bottom_field=NULL; + } + + if (!frame->mb_aff_frame_flag) + { + // TODO: benski> this part of the function is kinda slow and the compiler + // isn't too good at optimizing it + // can probably optimize by using CMOV eax, -1 in the inner loop + int ii, jj; + PicMotionParams *motion = &frame->motion; + for (j = 0; (j < frame->size_y >> 2) ; j+=4) + { + const short *slicej = frame->slice_id[j>>2]; + for (jj = 0;jj<4;jj++) + { + PicMotion *ref0=frame->motion.motion[LIST_0][j+jj]; + PicMotion *ref1=frame->motion.motion[LIST_1][j+jj]; + for (i = 0 ; i < (frame->size_x >> 2) ; i+=4) + { + short slice = slicej[i>>2]; + const h264_ref_t *ref_pic_num0 = frame->ref_pic_num[slice][LIST_0]; + const h264_ref_t *ref_pic_num1 = frame->ref_pic_num[slice][LIST_1]; + for (ii=0;ii<4;ii++) + { + dummylist0 = ref0[i+ii].ref_idx; + ref0[i+ii].ref_id = (dummylist0>=0)? ref_pic_num0[dummylist0] : -1; + dummylist1 = ref1[i+ii].ref_idx; + ref1[i+ii].ref_id = (dummylist1>=0)? ref_pic_num1[dummylist1] : -1; + } + } + } + } + } + else + { + for (j = 0; (j < frame->size_y >> 2) ; j++) + { + jdiv = j >> 2; + for (i = 0 ; i < (frame->size_x >> 2) ; i++) + { + idiv = (i >> 2); + currentmb = twosz16*(jdiv >> 1)+ (idiv)*2 + (jdiv & 0x01); + + if (frame->motion.mb_field[currentmb]) + { + int list_offset = currentmb&1; + dummylist0 = frame->motion.motion[LIST_0][j][i].ref_idx; + dummylist1 = frame->motion.motion[LIST_1][j][i].ref_idx; + //! association with id already known for fields. + frame->motion.field_references[2*list_offset ][j][i]= (dummylist0>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_0 + list_offset*2 + 2][dummylist0] : 0; + frame->motion.field_references[2*list_offset+1][j][i]= (dummylist1>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_1 + list_offset*2 + 2][dummylist1] : 0; + //! need to make association with frames + frame->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->frm_ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_0 + list_offset*2 + 2][dummylist0] : 0; + frame->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->frm_ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_1 + list_offset*2 + 2][dummylist1] : 0; + + } + else + { + dummylist0 = frame->motion.motion[LIST_0][j][i].ref_idx; + dummylist1 = frame->motion.motion[LIST_1][j][i].ref_idx; + frame->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_0][dummylist0] : -1; + frame->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_1][dummylist1] : -1; + } + } + } + } + + if (!frame->frame_mbs_only_flag) + { + if (frame->mb_aff_frame_flag) + { + PicMotionParams *frm_motion = &frame->motion; + PicMotionParams *top_motion = &fs_top->motion; + PicMotionParams *btm_motion = &fs_btm->motion; + for (j=0 ; j< (frame->size_y >> 3); j++) + { + jj = (j >> 2)*8 + (j & 0x03); + jj4 = jj + 4; + jdiv = (j >> 1); + for (i=0 ; i < (frame->size_x>>2); i++) + { + idiv = (i >> 2); + + currentmb = twosz16*(jdiv >> 1)+ (idiv)*2 + (jdiv & 0x01); + // Assign field mvs attached to MB-Frame buffer to the proper buffer + if (frm_motion->mb_field[currentmb]) + { + btm_motion->field_frame[j][i] = top_motion->field_frame[j][i]=1; + frm_motion->field_frame[2*j][i] = frm_motion->field_frame[2*j+1][i]=1; + + btm_motion->motion[LIST_0][j][i].mv[0] = frm_motion->motion[LIST_0][jj4][i].mv[0]; + btm_motion->motion[LIST_0][j][i].mv[1] = frm_motion->motion[LIST_0][jj4][i].mv[1]; + btm_motion->motion[LIST_1][j][i].mv[0] = frm_motion->motion[LIST_1][jj4][i].mv[0]; + btm_motion->motion[LIST_1][j][i].mv[1] = frm_motion->motion[LIST_1][jj4][i].mv[1]; + btm_motion->motion[LIST_0][j][i].ref_idx = frm_motion->motion[LIST_0][jj4][i].ref_idx; + btm_motion->motion[LIST_1][j][i].ref_idx = frm_motion->motion[LIST_1][jj4][i].ref_idx; + btm_motion->motion[LIST_0][j][i].ref_id = frm_motion->field_references[2][jj4][i]; + btm_motion->motion[LIST_1][j][i].ref_id = frm_motion->field_references[3][jj4][i]; + + + top_motion->motion[LIST_0][j][i].mv[0] = frm_motion->motion[LIST_0][jj][i].mv[0]; + top_motion->motion[LIST_0][j][i].mv[1] = frm_motion->motion[LIST_0][jj][i].mv[1]; + top_motion->motion[LIST_1][j][i].mv[0] = frm_motion->motion[LIST_1][jj][i].mv[0]; + top_motion->motion[LIST_1][j][i].mv[1] = frm_motion->motion[LIST_1][jj][i].mv[1]; + top_motion->motion[LIST_0][j][i].ref_idx = frm_motion->motion[LIST_0][jj][i].ref_idx; + top_motion->motion[LIST_1][j][i].ref_idx = frm_motion->motion[LIST_1][jj][i].ref_idx; + top_motion->motion[LIST_0][j][i].ref_id = frm_motion->field_references[0][jj][i]; + top_motion->motion[LIST_1][j][i].ref_id = frm_motion->field_references[1][jj][i]; + } + } + } + } + + //! Generate field MVs from Frame MVs + for (j=0 ; j < (frame->size_y >> 3) ; j++) + { + jj = 2* RSD(j); + jdiv = (j >> 1); + for (i=0 ; i < (frame->size_x >> 2) ; i++) + { + ii = RSD(i); + idiv = (i >> 2); + + currentmb = twosz16 * (jdiv >> 1)+ (idiv)*2 + (jdiv & 0x01); + + if (!frame->mb_aff_frame_flag || !frame->motion.mb_field[currentmb]) + { + frame->motion.field_frame[2*j+1][i] = frame->motion.field_frame[2*j][i]=0; + + fs_top->motion.field_frame[j][i] = fs_btm->motion.field_frame[j][i] = 0; + + fs_top->motion.motion[LIST_0][j][i].mv[0] = fs_btm->motion.motion[LIST_0][j][i].mv[0] = frame->motion.motion[LIST_0][jj][ii].mv[0]; + fs_top->motion.motion[LIST_0][j][i].mv[1] = fs_btm->motion.motion[LIST_0][j][i].mv[1] = frame->motion.motion[LIST_0][jj][ii].mv[1]; + fs_top->motion.motion[LIST_1][j][i].mv[0] = fs_btm->motion.motion[LIST_1][j][i].mv[0] = frame->motion.motion[LIST_1][jj][ii].mv[0]; + fs_top->motion.motion[LIST_1][j][i].mv[1] = fs_btm->motion.motion[LIST_1][j][i].mv[1] = frame->motion.motion[LIST_1][jj][ii].mv[1]; + + // Scaling of references is done here since it will not affect spatial direct (2*0 =0) + if (frame->motion.motion[LIST_0][jj][ii].ref_idx == -1) + fs_top->motion.motion[LIST_0][j][i].ref_idx = fs_btm->motion.motion[LIST_0][j][i].ref_idx = - 1; + else + { + dummylist0=fs_top->motion.motion[LIST_0][j][i].ref_idx = fs_btm->motion.motion[LIST_0][j][i].ref_idx = frame->motion.motion[LIST_0][jj][ii].ref_idx; + fs_top->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->top_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_0][dummylist0] : 0; + fs_btm->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->bottom_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_0][dummylist0] : 0; + } + + if (frame->motion.motion[LIST_1][jj][ii].ref_idx == -1) + fs_top->motion.motion[LIST_1][j][i].ref_idx = fs_btm->motion.motion[LIST_1][j][i].ref_idx = - 1; + else + { + dummylist1=fs_top->motion.motion[LIST_1][j][i].ref_idx = fs_btm->motion.motion[LIST_1][j][i].ref_idx = frame->motion.motion[LIST_1][jj][ii].ref_idx; + + fs_top->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->top_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_1][dummylist1] : 0; + fs_btm->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->bottom_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_1][dummylist1] : 0; + } + } + else + { + frame->motion.field_frame[2*j+1][i] = frame->motion.field_frame[2*j][i]= frame->motion.mb_field[currentmb]; + } + } + } + } + else + { + memset( &(frame->motion.field_frame[0][0]), 0, (frame->size_y * frame->size_x >> 4) * sizeof(byte)); + } +} + + +/*! + ************************************************************************ + * \brief + * Generate a frame from top and bottom fields, + * YUV components and display information only + ************************************************************************ + */ +void dpb_combine_field_yuv(VideoParameters *p_Vid, FrameStore *fs) +{ + int i, j; + + fs->frame = alloc_storable_picture(p_Vid, FRAME, fs->top_field->size_x, fs->top_field->size_y*2, fs->top_field->size_x_cr, fs->top_field->size_y_cr*2); + + for (i=0; i<fs->top_field->size_y; i++) + { + memcpy(fs->frame->imgY->img[i*2], fs->top_field->imgY->img[i] , fs->top_field->size_x * sizeof(imgpel)); // top field + memcpy(fs->frame->imgY->img[i*2 + 1], fs->bottom_field->imgY->img[i], fs->bottom_field->size_x * sizeof(imgpel)); // bottom field + } + + for (j = 0; j < 2; j++) + { + for (i=0; i<fs->top_field->size_y_cr; i++) + { + memcpy(fs->frame->imgUV[j]->img[i*2], fs->top_field->imgUV[j]->img[i], fs->top_field->size_x_cr*sizeof(imgpel)); + memcpy(fs->frame->imgUV[j]->img[i*2 + 1], fs->bottom_field->imgUV[j]->img[i], fs->bottom_field->size_x_cr*sizeof(imgpel)); + } + } + + fs->poc=fs->frame->poc =fs->frame->frame_poc = imin (fs->top_field->poc, fs->bottom_field->poc); + + fs->bottom_field->frame_poc=fs->top_field->frame_poc=fs->frame->poc; + + fs->bottom_field->top_poc=fs->frame->top_poc=fs->top_field->poc; + fs->top_field->bottom_poc=fs->frame->bottom_poc=fs->bottom_field->poc; + + fs->frame->used_for_reference = (fs->top_field->used_for_reference && fs->bottom_field->used_for_reference ); + fs->frame->is_long_term = (fs->top_field->is_long_term && fs->bottom_field->is_long_term ); + + if (fs->frame->is_long_term) + fs->frame->long_term_frame_idx = fs->long_term_frame_idx; + + fs->frame->top_field = fs->top_field; + fs->frame->bottom_field = fs->bottom_field; + + fs->frame->coded_frame = 0; + + fs->frame->chroma_format_idc = fs->top_field->chroma_format_idc; + fs->frame->frame_cropping_flag = fs->top_field->frame_cropping_flag; + if (fs->frame->frame_cropping_flag) + { + fs->frame->frame_cropping_rect_top_offset = fs->top_field->frame_cropping_rect_top_offset; + fs->frame->frame_cropping_rect_bottom_offset = fs->top_field->frame_cropping_rect_bottom_offset; + fs->frame->frame_cropping_rect_left_offset = fs->top_field->frame_cropping_rect_left_offset; + fs->frame->frame_cropping_rect_right_offset = fs->top_field->frame_cropping_rect_right_offset; + } + + fs->top_field->frame = fs->bottom_field->frame = fs->frame; +} + + +/*! + ************************************************************************ + * \brief + * Generate a frame from top and bottom fields + ************************************************************************ + */ +void dpb_combine_field(VideoParameters *p_Vid, FrameStore *fs) +{ + int i,j, k, jj, jj4; + int dummylist0, dummylist1; + + dpb_combine_field_yuv(p_Vid, fs); + + + //combine field for frame + for (j=0; j<=(imax(fs->top_field->max_slice_id, fs->bottom_field->max_slice_id)); j++) + { + for (k = LIST_0; k <= LIST_1; k++) + { + for (i=0;i<16;i++) + { + fs->frame->ref_pic_num[j][k][i]= (h264_ref_t) i64min ((fs->top_field->ref_pic_num[j][k][2*i]/2)*2, (fs->bottom_field->ref_pic_num[j][k][2*i]/2)*2); + } + } + } + + //! Use inference flag to remap mvs/references + + //! Generate Frame parameters from field information. + for (j=0 ; j < (fs->top_field->size_y >> 2) ; j++) + { + jj = 8*(j >> 2) + (j & 0x03); + jj4 = jj + 4; + for (i=0 ; i< (fs->top_field->size_x >> 2) ; i++) + { + fs->frame->motion.field_frame[jj][i]= fs->frame->motion.field_frame[jj4][i]=1; + + fs->frame->motion.motion[LIST_0][jj][i].mv[0] = fs->top_field->motion.motion[LIST_0][j][i].mv[0]; + fs->frame->motion.motion[LIST_0][jj][i].mv[1] = fs->top_field->motion.motion[LIST_0][j][i].mv[1]; + fs->frame->motion.motion[LIST_1][jj][i].mv[0] = fs->top_field->motion.motion[LIST_1][j][i].mv[0]; + fs->frame->motion.motion[LIST_1][jj][i].mv[1] = fs->top_field->motion.motion[LIST_1][j][i].mv[1]; + + dummylist0=fs->frame->motion.motion[LIST_0][jj][i].ref_idx = fs->top_field->motion.motion[LIST_0][j][i].ref_idx; + dummylist1=fs->frame->motion.motion[LIST_1][jj][i].ref_idx = fs->top_field->motion.motion[LIST_1][j][i].ref_idx; + + //! association with id already known for fields. + fs->top_field->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? fs->top_field->ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + fs->top_field->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? fs->top_field->ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + + //! need to make association with frames + fs->frame->motion.motion[LIST_0][jj][i].ref_id = (dummylist0>=0)? fs->top_field->frm_ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + fs->frame->motion.motion[LIST_1][jj][i].ref_id = (dummylist1>=0)? fs->top_field->frm_ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + + fs->frame->motion.motion[LIST_0][jj4][i].mv[0] = fs->bottom_field->motion.motion[LIST_0][j][i].mv[0]; + fs->frame->motion.motion[LIST_0][jj4][i].mv[1] = fs->bottom_field->motion.motion[LIST_0][j][i].mv[1] ; + fs->frame->motion.motion[LIST_1][jj4][i].mv[0] = fs->bottom_field->motion.motion[LIST_1][j][i].mv[0]; + fs->frame->motion.motion[LIST_1][jj4][i].mv[1] = fs->bottom_field->motion.motion[LIST_1][j][i].mv[1] ; + + dummylist0=fs->frame->motion.motion[LIST_0][jj4][i].ref_idx = fs->bottom_field->motion.motion[LIST_0][j][i].ref_idx; + dummylist1=fs->frame->motion.motion[LIST_1][jj4][i].ref_idx = fs->bottom_field->motion.motion[LIST_1][j][i].ref_idx; + + fs->bottom_field->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? fs->bottom_field->ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + fs->bottom_field->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? fs->bottom_field->ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + + //! need to make association with frames + fs->frame->motion.motion[LIST_0][jj4][i].ref_id = (dummylist0>=0)? fs->bottom_field->frm_ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : -1; + fs->frame->motion.motion[LIST_1][jj4][i].ref_id = (dummylist1>=0)? fs->bottom_field->frm_ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : -1; + + fs->top_field->motion.field_frame[j][i]=1; + fs->bottom_field->motion.field_frame[j][i]=1; + } + } +} + + +/*! + ************************************************************************ + * \brief + * Allocate memory for buffering of reference picture reordering commands + ************************************************************************ + */ +void alloc_ref_pic_list_reordering_buffer(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + int size = currSlice->num_ref_idx_l0_active + 1; + + if (p_Vid->type!=I_SLICE && p_Vid->type!=SI_SLICE) + { + if ((currSlice->reordering_of_pic_nums_idc_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: reordering_of_pic_nums_idc_l0"); + if ((currSlice->abs_diff_pic_num_minus1_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l0"); + if ((currSlice->long_term_pic_idx_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l0"); + } + else + { + currSlice->reordering_of_pic_nums_idc_l0 = NULL; + currSlice->abs_diff_pic_num_minus1_l0 = NULL; + currSlice->long_term_pic_idx_l0 = NULL; + } + + size = currSlice->num_ref_idx_l1_active+1; + + if (p_Vid->type==B_SLICE) + { + if ((currSlice->reordering_of_pic_nums_idc_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: reordering_of_pic_nums_idc_l1"); + if ((currSlice->abs_diff_pic_num_minus1_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l1"); + if ((currSlice->long_term_pic_idx_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l1"); + } + else + { + currSlice->reordering_of_pic_nums_idc_l1 = NULL; + currSlice->abs_diff_pic_num_minus1_l1 = NULL; + currSlice->long_term_pic_idx_l1 = NULL; + } +} + + +/*! + ************************************************************************ + * \brief + * Free memory for buffering of reference picture reordering commands + ************************************************************************ + */ +void free_ref_pic_list_reordering_buffer(Slice *currSlice) +{ + + if (currSlice->reordering_of_pic_nums_idc_l0) + free(currSlice->reordering_of_pic_nums_idc_l0); + if (currSlice->abs_diff_pic_num_minus1_l0) + free(currSlice->abs_diff_pic_num_minus1_l0); + if (currSlice->long_term_pic_idx_l0) + free(currSlice->long_term_pic_idx_l0); + + currSlice->reordering_of_pic_nums_idc_l0 = NULL; + currSlice->abs_diff_pic_num_minus1_l0 = NULL; + currSlice->long_term_pic_idx_l0 = NULL; + + if (currSlice->reordering_of_pic_nums_idc_l1) + free(currSlice->reordering_of_pic_nums_idc_l1); + if (currSlice->abs_diff_pic_num_minus1_l1) + free(currSlice->abs_diff_pic_num_minus1_l1); + if (currSlice->long_term_pic_idx_l1) + free(currSlice->long_term_pic_idx_l1); + + currSlice->reordering_of_pic_nums_idc_l1 = NULL; + currSlice->abs_diff_pic_num_minus1_l1 = NULL; + currSlice->long_term_pic_idx_l1 = NULL; +} + +/*! + ************************************************************************ + * \brief + * Tian Dong + * June 13, 2002, Modifed on July 30, 2003 + * + * If a gap in frame_num is found, try to fill the gap + * \param p_Vid + * + ************************************************************************ + */ +// TODO: benski> pass in timecode +void fill_frame_num_gap(VideoParameters *p_Vid) +{ + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + int CurrFrameNum; + int UnusedShortTermFrameNum; + StorablePicture *picture = NULL; + int tmp1 = p_Vid->delta_pic_order_cnt[0]; + int tmp2 = p_Vid->delta_pic_order_cnt[1]; + p_Vid->delta_pic_order_cnt[0] = p_Vid->delta_pic_order_cnt[1] = 0; + +// printf("A gap in frame number is found, try to fill it.\n"); + + UnusedShortTermFrameNum = (p_Vid->pre_frame_num + 1) % p_Vid->MaxFrameNum; + CurrFrameNum = p_Vid->frame_num; + + while (CurrFrameNum != UnusedShortTermFrameNum) + { + picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr); + picture->coded_frame = 1; + picture->pic_num = UnusedShortTermFrameNum; + picture->frame_num = UnusedShortTermFrameNum; + picture->non_existing = 1; + picture->is_output = 1; + picture->used_for_reference = 1; + + picture->adaptive_ref_pic_buffering_flag = 0; + + p_Vid->frame_num = UnusedShortTermFrameNum; + if (active_sps->pic_order_cnt_type!=0) + { + decode_poc(p_Vid); + } + picture->top_poc=p_Vid->toppoc; + picture->bottom_poc=p_Vid->bottompoc; + picture->frame_poc=p_Vid->framepoc; + picture->poc=p_Vid->framepoc; + + store_picture_in_dpb(p_Vid, picture); + + picture=NULL; + p_Vid->pre_frame_num = UnusedShortTermFrameNum; + UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % p_Vid->MaxFrameNum; + } + p_Vid->delta_pic_order_cnt[0] = tmp1; + p_Vid->delta_pic_order_cnt[1] = tmp2; + p_Vid->frame_num = CurrFrameNum; + +} + +/*! + ************************************************************************ + * \brief + * Allocate motion parameter memory for colocated structure + * + ************************************************************************ + */ +#define ROUNDUP2(size) (((size)+1) & ~1) +void alloc_motion_params(VideoParameters *p_Vid, MotionParams *ftype, int size_y, int size_x) +{ + ftype->motion[0] = 0; + ftype->motion[1] = 0; + if (motion_cache_dimensions_match(&p_Vid->motion_cache, size_x, size_y)) + { + ftype->motion[0] = motion_cache_get(&p_Vid->motion_cache); + ftype->motion[1] = motion_cache_get(&p_Vid->motion_cache); + } + if (!ftype->motion[0]) + get_mem2DPicMotion(&(ftype->motion[0]), size_y, size_x); + if (!ftype->motion[1]) + get_mem2DPicMotion(&(ftype->motion[1]), size_y, size_x); + + get_mem2D (&(ftype->moving_block) , ROUNDUP2(size_y), ROUNDUP2(size_x)); +} + +/*! + ************************************************************************ + * \brief + * Allocate co-located memory + * + * \param size_x + * horizontal luma size + * \param size_y + * vertical luma size + * \param mb_adaptive_frame_field_flag + * flag that indicates macroblock adaptive frame/field coding + * + * \return + * the allocated StorablePicture structure + ************************************************************************ + */ +ColocatedParams* alloc_colocated(VideoParameters *p_Vid, int size_x, int size_y, int mb_adaptive_frame_field_flag) +{ + ColocatedParams *s; + + s = calloc(1, sizeof(ColocatedParams)); + if (NULL == s) + no_mem_exit("alloc_colocated: s"); + + s->size_x = size_x; + s->size_y = size_y; + + alloc_motion_params(p_Vid, &s->frame, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + + if (mb_adaptive_frame_field_flag) + { + alloc_motion_params(p_Vid, &s->top , size_y / (BLOCK_SIZE * 2), size_x / BLOCK_SIZE); + alloc_motion_params(p_Vid, &s->bottom, size_y / (BLOCK_SIZE * 2), size_x / BLOCK_SIZE); + } + + s->mb_adaptive_frame_field_flag = mb_adaptive_frame_field_flag; + + return s; +} + +/*! + ************************************************************************ + * \brief + * Free co-located memory. + * + * \param p + * Picture to be freed + * + ************************************************************************ + */ +void free_colocated(VideoParameters *p_Vid, ColocatedParams* p) +{ + if (p) + { + if (motion_cache_dimensions_match(&p_Vid->motion_cache, p_Vid->width / BLOCK_SIZE, p_Vid->height / BLOCK_SIZE)) + { + motion_cache_add(&p_Vid->motion_cache,p->frame.motion[0]); + motion_cache_add(&p_Vid->motion_cache,p->frame.motion[1]); + } + else + { + free_mem2DPicMotion(p->frame.motion[0]); + free_mem2DPicMotion(p->frame.motion[1]); + } + + if (p->frame.moving_block) + { + free_mem2D (p->frame.moving_block); + p->frame.moving_block=NULL; + } + + if (p->mb_adaptive_frame_field_flag) + { + free_mem2DPicMotion(p->top.motion[0]); + free_mem2DPicMotion(p->top.motion[1]); + + if (p->top.moving_block) + { + free_mem2D (p->top.moving_block); + p->top.moving_block=NULL; + } + + free_mem2DPicMotion(p->bottom.motion[0]); + free_mem2DPicMotion(p->bottom.motion[1]); + + if (p->bottom.moving_block) + { + free_mem2D (p->bottom.moving_block); + p->bottom.moving_block=NULL; + } + } + + free(p); + + p = NULL; + } +} + +/*! + ************************************************************************ + * \brief + * Compute co-located motion info + * + ************************************************************************ + */ +void compute_colocated (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]) +{ + StorablePicture *fs = listX[LIST_1 ][0]; + int i,j, ii, jj; + int fs_size_x4 = (fs->size_x >> 2); + int fs_size_y4 = (fs->size_y >> 2); + MotionParams *p_motion = &p->frame; + PicMotionParams *p_frm_motion = &fs->motion; + VideoParameters *p_Vid = currSlice->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + p->is_long_term = fs->is_long_term; + + if (active_sps->direct_8x8_inference_flag) + { + // Spatial Direct + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + if (p->is_long_term) + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4; i++) + { + p_motion->moving_block[j][i]= (byte) 1; + } + } + } + else + { + PicMotion **motion0 = p_frm_motion->motion[LIST_0]; + PicMotion **motion1 = p_frm_motion->motion[LIST_1]; + for (j=0 ; j < fs_size_y4; j+=2) + { + PicMotion *ref0, *ref1; + byte *moving_block = p_motion->moving_block[j]; + byte *moving_block2 = p_motion->moving_block[j+1]; + jj = RSD(j); + ref0 = motion0[jj]; + ref1 = motion1[jj]; + for (i=0 ; i < fs_size_x4; i+=2) + { + PicMotion *r0; + ii = RSD(i); + r0 = &ref0[ii]; + if (((r0->ref_idx == 0) && (iabs(r0->mv[0])>>1 == 0) && (iabs(r0->mv[1])>>1 == 0))) + { + moving_block[i]=0; + moving_block[i+1]=0; + moving_block2[i]=0; + moving_block2[i+1]=0; + } + else if (r0->ref_idx == -1) + { + PicMotion *r1 = &ref1[ii]; + if ((r1->ref_idx == 0) && (iabs(r1->mv[0])>>1 == 0) && (iabs(r1->mv[1])>>1 == 0)) + { + moving_block[i]=0; + moving_block[i+1]=0; + moving_block2[i]=0; + moving_block2[i+1]=0; + } + else + { + moving_block[i]=1; + moving_block[i+1]=1; + moving_block2[i]=1; + moving_block2[i+1]=1; + } + } + else + { + moving_block[i]=1; + moving_block[i+1]=1; + moving_block2[i]=1; + moving_block2[i+1]=1; + } + /* + p_motion->moving_block[j][i]= (byte) ( + !(((ref0[ii].ref_idx == 0) + && (iabs(ref0[ii].mv[0])>>1 == 0) + && (iabs(ref0[ii].mv[1])>>1 == 0))) + || ((ref0[ii].ref_idx == -1) + && (ref1[ii].ref_idx == 0) + && (iabs(ref1[ii].mv[0])>>1 == 0) + && (iabs(ref1[ii].mv[1])>>1 == 0)))); + */ + } + } + } + } + else + { + int k; + + for (k = LIST_0; k<=LIST_1; k++) + { + for (j = 0; j < fs_size_y4; j++) + { + for (i=0;i<fs_size_x4;i++) + { + PicMotion *src = &p_frm_motion->motion[k][j][i]; + PicMotion *dest = &p_motion->motion[k][j][i]; + memcpy(&dest->mv, &src->mv, sizeof(MotionVector)); + dest->ref_idx = src->ref_idx; + dest->ref_pic_id = src->ref_id; + } + } + } + + // temporal direct + for (j=0 ; j < fs_size_y4; j++) + { + jj = RSD(j); + for (i=0 ; i < fs_size_x4; i++) + { + ii = RSD(i); + + p_motion->motion[LIST_0][j][i] = p_motion->motion[LIST_0][jj][ii]; + p_motion->motion[LIST_1][j][i] = p_motion->motion[LIST_1][jj][ii]; + //p_motion->mv[LIST_0][j][i].mv[0] = p_motion->mv[LIST_0][jj][ii][0]; + //p_motion->mv[LIST_0][j][i].mv[1] = p_motion->mv[LIST_0][jj][ii][1]; + //p_motion->mv[LIST_1][j][i].mv[0] = p_motion->mv[LIST_1][jj][ii][0]; + //p_motion->mv[LIST_1][j][i][1] = p_motion->mv[LIST_1][jj][ii][1]; + + //p_motion->ref_idx[LIST_0][j][i] = p_motion->ref_idx[LIST_0][jj][ii]; + //p_motion->ref_idx[LIST_1][j][i] = p_motion->ref_idx[LIST_1][jj][ii]; + //p_motion->ref_pic_id[LIST_0][j][i] = p_motion->ref_pic_id[LIST_0][jj][ii]; + //p_motion->ref_pic_id[LIST_1][j][i] = p_motion->ref_pic_id[LIST_1][jj][ii]; + } + } + } + } + else + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4; i++) + { + memcpy(&p_motion->motion[LIST_0][j][i].mv, &p_frm_motion->motion[LIST_0][j][i].mv, sizeof(MotionVector)); + p_motion->motion[LIST_0][j][i].ref_idx= p_frm_motion->motion[LIST_0][j][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id= p_frm_motion->motion[LIST_0][j][i].ref_id; + memcpy(&p_motion->motion[LIST_1][j][i].mv, &p_frm_motion->motion[LIST_1][j][i].mv, sizeof(MotionVector)); + p_motion->motion[LIST_1][j][i].ref_idx= p_frm_motion->motion[LIST_1][j][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_pic_id= p_frm_motion->motion[LIST_1][j][i].ref_id; + } + } + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + if (p->is_long_term) + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4; i++) + { + + p_motion->moving_block[j][i]= 1; + } + } + } + else + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4; i++) + { + p_motion->moving_block[j][i]= + !((((p_motion->motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p_motion->motion[LIST_0][j][i].ref_idx == -1) + && (p_motion->motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + } + } + } + } + + if (currSlice->direct_spatial_mv_pred_flag == 0) + { + for (j = 0; j < 2;j += 2) + { + for (i=0; i<p_Vid->listXsize[j];i++) + { + int prescale, iTRb, iTRp; + + if (j==0) + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->poc - listX[LIST_0 + j][i]->poc ); + } + else if (j == 2) + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->top_poc - listX[LIST_0 + j][i]->poc ); + } + else + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc ); + } + + iTRp = iClip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc); + + if (iTRp!=0) + { + prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp; + currSlice->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ; + } + else + { + currSlice->mvscale[j][i] = 9999; + } + } + } + } +} + +/*! + ************************************************************************ + * \brief + * Compute co-located motion info (for interlace support) + * + ************************************************************************ + */ +void compute_colocated_frames_mbs (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]) +{ + StorablePicture *fs = listX[LIST_1 ][0]; + StorablePicture *fs_top = fs, *fs_bottom = fs; + int i,j, ii, jj, jdiv; + int fs_size_x4 = (fs->size_x >> 2); + int fs_size_y4 = (fs->size_y >> 2); + MotionParams *p_motion = &p->frame; + PicMotionParams *p_frm_motion = &fs->motion; + VideoParameters *p_Vid = currSlice->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + + if (currSlice->mb_aff_frame_flag) + { + fs_top = listX[LIST_1 + 2][0]; + fs_bottom = listX[LIST_1 + 4][0]; + } + else + { + if (p_Vid->field_pic_flag) + { + if ((p_Vid->structure != fs->structure) && (fs->coded_frame)) + { + if (p_Vid->structure==TOP_FIELD) + { + fs_top = fs_bottom = fs = listX[LIST_1 ][0]->top_field; + } + else + { + fs_top = fs_bottom = fs = listX[LIST_1 ][0]->bottom_field; + } + } + p_frm_motion = &fs->motion; + } + } + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + if (!currSlice->mb_aff_frame_flag) + { + int k; + + for (k = LIST_0; k<=LIST_1; k++) + { + for (j = 0; j < (fs->size_y>>2); j++) + { + for (i=0;i<fs_size_x4;i++) + { + + memcpy(&p_motion->motion[k][j][i].mv, &p_frm_motion->motion[k][j][i].mv, sizeof(MotionVector)); + p_motion->motion[k][j][i].ref_idx=p_frm_motion->motion[k][j][i].ref_idx; + p_motion->motion[k][j][i].ref_pic_id=p_frm_motion->motion[k][j][i].ref_id; + } + } + } + p->is_long_term = fs->is_long_term; + } + else + { + for (j=0 ; j < (fs->size_y>>2); j++) + { + jdiv = (j>>1); + jj = jdiv + ((j>>3)<<2); + for (i=0 ; i < fs_size_x4 ; i++) + { + if (p_frm_motion->field_frame[j][i]) + { + //! Assign frame buffers for field MBs + //! Check whether we should use top or bottom field mvs. + //! Depending on the assigned poc values. + + if (iabs(p_Vid->dec_picture->poc - fs_bottom->poc)> iabs(p_Vid->dec_picture->poc -fs_top->poc) ) + { + p_motion->motion[LIST_0][j][i].mv[0] = fs_top->motion.motion[LIST_0][jdiv][i].mv[0]; + p_motion->motion[LIST_0][j][i].mv[1] = fs_top->motion.motion[LIST_0][jdiv][i].mv[1]; + p_motion->motion[LIST_1][j][i].mv[0] = fs_top->motion.motion[LIST_1][jdiv][i].mv[0]; + p_motion->motion[LIST_1][j][i].mv[1] = fs_top->motion.motion[LIST_1][jdiv][i].mv[1]; + p_motion->motion[LIST_0][j][i].ref_idx = fs_top->motion.motion[LIST_0][jdiv][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_idx = fs_top->motion.motion[LIST_1][jdiv][i].ref_idx; + //p_motion->mv[LIST_0][j][i][0] = fs_top->motion.motion[LIST_0][jdiv][i].mv[0]; + //p_motion->mv[LIST_0][j][i][1] = fs_top->motion.motion[LIST_0][jdiv][i].mv[1] ; + //p_motion->mv[LIST_1][j][i][0] = fs_top->motion.motion[LIST_1][jdiv][i].mv[0]; + //p_motion->mv[LIST_1][j][i][1] = fs_top->motion.motion[LIST_1][jdiv][i].mv[1] ; + //p_motion->ref_idx[LIST_0][j][i] = fs_top->motion.motion[LIST_0][jdiv][i].ref_idx; + //p_motion->ref_idx[LIST_1][j][i] = fs_top->motion.motion[LIST_1][jdiv][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj][i].ref_id; + p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj][i].ref_id; + + p->is_long_term = fs_top->is_long_term; + } + else + { + p_motion->motion[LIST_0][j][i].mv[0] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[0]; + p_motion->motion[LIST_0][j][i].mv[1] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[1]; + p_motion->motion[LIST_1][j][i].mv[0] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[0]; + p_motion->motion[LIST_1][j][i].mv[1] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[1]; + p_motion->motion[LIST_0][j][i].ref_idx = fs_bottom->motion.motion[LIST_0][jdiv][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_idx = fs_bottom->motion.motion[LIST_1][jdiv][i].ref_idx; + //p_motion->mv[LIST_0][j][i][0] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[0]; + //p_motion->mv[LIST_0][j][i][1] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[1] ; + //p_motion->mv[LIST_1][j][i][0] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[0]; + //p_motion->mv[LIST_1][j][i][1] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[1] ; + //p_motion->ref_idx[LIST_0][j][i] = fs_bottom->motion.motion[LIST_0][jdiv][i].ref_idx; + //p_motion->ref_idx[LIST_1][j][i] = fs_bottom->motion.motion[LIST_1][jdiv][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj + 4][i].ref_id; + p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj + 4][i].ref_id; + + p->is_long_term = fs_bottom->is_long_term; + } + } + else + { + p_motion->motion[LIST_0][j][i].mv[0] = p_frm_motion->motion[LIST_0][j][i].mv[0]; + p_motion->motion[LIST_0][j][i].mv[1] = p_frm_motion->motion[LIST_0][j][i].mv[1] ; + p_motion->motion[LIST_1][j][i].mv[0] = p_frm_motion->motion[LIST_1][j][i].mv[0]; + p_motion->motion[LIST_1][j][i].mv[1] = p_frm_motion->motion[LIST_1][j][i].mv[1] ; + p_motion->motion[LIST_0][j][i].ref_idx = p_frm_motion->motion[LIST_0][j][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_idx = p_frm_motion->motion[LIST_1][j][i].ref_idx; + //p_motion->mv[LIST_0][j][i][0] = p_frm_motion->motion[LIST_0][j][i].mv[0]; + //p_motion->mv[LIST_0][j][i][1] = p_frm_motion->motion[LIST_0][j][i].mv[1] ; + //p_motion->mv[LIST_1][j][i][0] = p_frm_motion->motion[LIST_1][j][i].mv[0]; + //p_motion->mv[LIST_1][j][i][1] = p_frm_motion->motion[LIST_1][j][i].mv[1] ; + //p_motion->ref_idx[LIST_0][j][i] = p_frm_motion->motion[LIST_0][j][i].ref_idx; + //p_motion->ref_idx[LIST_1][j][i] = p_frm_motion->motion[LIST_1][j][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][j][i].ref_id; + p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][j][i].ref_id; + + p->is_long_term = fs->is_long_term; + } + } + } + } + } + + //! Generate field MVs from Frame MVs + if (p_Vid->structure || currSlice->mb_aff_frame_flag) + { + for (j = 0; j < fs->size_y >> 3; j++) + { + jj = RSD(j); + for (i = 0 ; i < fs->size_x >> 2; i++) + { + ii = RSD(i); + //! Do nothing if macroblock as field coded in MB-AFF + if (!currSlice->mb_aff_frame_flag ) + { + p_motion->motion[LIST_0][j][i].mv[0] = p_frm_motion->motion[LIST_0][jj][ii].mv[0]; + p_motion->motion[LIST_0][j][i].mv[1] = p_frm_motion->motion[LIST_0][jj][ii].mv[1]; + p_motion->motion[LIST_1][j][i].mv[0] = p_frm_motion->motion[LIST_1][jj][ii].mv[0]; + p_motion->motion[LIST_1][j][i].mv[1] = p_frm_motion->motion[LIST_1][jj][ii].mv[1]; + + // Scaling of references is done here since it will not affect spatial direct (2*0 =0) + + if (p_frm_motion->motion[LIST_0][jj][ii].ref_idx == -1) + { + p_motion->motion[LIST_0][j][i].ref_idx = -1; + p_motion->motion[LIST_0][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ? + } + else + { + p_motion->motion[LIST_0][j][i].ref_idx = p_frm_motion->motion[LIST_0][jj][ii].ref_idx ; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion [LIST_0][jj][ii].ref_id; + } + + if (p_frm_motion->motion[LIST_1][jj][ii].ref_idx == -1) + { + p_motion->motion[LIST_1][j][i].ref_idx = -1; + p_motion->motion[LIST_1][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ? + } + else + { + p_motion->motion[LIST_1][j][i].ref_idx = p_frm_motion->motion[LIST_1][jj][ii].ref_idx; + p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion [LIST_1][jj][ii].ref_id; + } + + p->is_long_term = fs->is_long_term; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p_motion->moving_block[j][i] = + !((!p->is_long_term + && ((p_motion->motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p_motion->motion[LIST_0][j][i].ref_idx == -1) + && (p_motion->motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + } + else + { + p->bottom.motion[LIST_0][j][i].mv[0] = fs_bottom->motion.motion[LIST_0][jj][ii].mv[0]; + p->bottom.motion[LIST_0][j][i].mv[1] = fs_bottom->motion.motion[LIST_0][jj][ii].mv[1]; + p->bottom.motion[LIST_1][j][i].mv[0] = fs_bottom->motion.motion[LIST_1][jj][ii].mv[0]; + p->bottom.motion[LIST_1][j][i].mv[1] = fs_bottom->motion.motion[LIST_1][jj][ii].mv[1]; + p->bottom.motion[LIST_0][j][i].ref_idx = fs_bottom->motion.motion[LIST_0][jj][ii].ref_idx; + p->bottom.motion[LIST_1][j][i].ref_idx = fs_bottom->motion.motion[LIST_1][jj][ii].ref_idx; + p->bottom.motion[LIST_0][j][i].ref_pic_id = fs_bottom->motion.motion[LIST_0][jj][ii].ref_id; + p->bottom.motion[LIST_1][j][i].ref_pic_id = fs_bottom->motion.motion[LIST_1][jj][ii].ref_id; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->bottom.moving_block[j][i] = + !((!fs_bottom->is_long_term + && ((p->bottom.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->bottom.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->bottom.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->bottom.motion[LIST_0][j][i].ref_idx == -1) + && (p->bottom.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->bottom.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->bottom.motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + + p->top.motion[LIST_0][j][i].mv[0] = fs_top->motion.motion[LIST_0][jj][ii].mv[0]; + p->top.motion[LIST_0][j][i].mv[1] = fs_top->motion.motion[LIST_0][jj][ii].mv[1]; + p->top.motion[LIST_1][j][i].mv[0] = fs_top->motion.motion[LIST_1][jj][ii].mv[0]; + p->top.motion[LIST_1][j][i].mv[1] = fs_top->motion.motion[LIST_1][jj][ii].mv[1]; + p->top.motion[LIST_0][j][i].ref_idx = fs_top->motion.motion[LIST_0][jj][ii].ref_idx; + p->top.motion[LIST_1][j][i].ref_idx = fs_top->motion.motion[LIST_1][jj][ii].ref_idx; + p->top.motion[LIST_0][j][i].ref_pic_id = fs_top->motion.motion[LIST_0][jj][ii].ref_id; + p->top.motion[LIST_1][j][i].ref_pic_id = fs_top->motion.motion[LIST_1][jj][ii].ref_id; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->top.moving_block[j][i] = + !((!fs_top->is_long_term + && ((p->top.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->top.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->top.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->top.motion[LIST_0][j][i].ref_idx == -1) + && (p->top.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->top.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->top.motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + + if ((currSlice->direct_spatial_mv_pred_flag == 0 ) && !p_frm_motion->field_frame[2*j][i]) + { + p->top.motion[LIST_0][j][i].mv[1] /= 2; + p->top.motion[LIST_1][j][i].mv[1] /= 2; + p->bottom.motion[LIST_0][j][i].mv[1] /= 2; + p->bottom.motion[LIST_1][j][i].mv[1] /= 2; + } + } + } + } + } + + //if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + if (!active_sps->frame_mbs_only_flag) + { + //! Use inference flag to remap mvs/references + //! Frame with field co-located + if (!p_Vid->structure) + { + for (j=0 ; j < fs_size_y4; j++) + { + jdiv = (j >> 1); + jj = jdiv + ((j >> 3) << 2); + for (i = 0 ; i < fs_size_x4; i++) + { + if (p_frm_motion->field_frame[j][i]) + { + if (iabs(p_Vid->dec_picture->poc - fs->bottom_field->poc) > iabs(p_Vid->dec_picture->poc - fs->top_field->poc)) + { + p_motion->motion[LIST_0][j][i].mv[0] = fs->top_field->motion.motion[LIST_0][jdiv][i].mv[0]; + p_motion->motion[LIST_0][j][i].mv[1] = fs->top_field->motion.motion[LIST_0][jdiv][i].mv[1] ; + p_motion->motion[LIST_1][j][i].mv[0] = fs->top_field->motion.motion[LIST_1][jdiv][i].mv[0]; + p_motion->motion[LIST_1][j][i].mv[1] = fs->top_field->motion.motion[LIST_1][jdiv][i].mv[1] ; + + p_motion->motion[LIST_0][j][i].ref_idx = fs->top_field->motion.motion[LIST_0][jdiv][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_idx = fs->top_field->motion.motion[LIST_1][jdiv][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj][i].ref_id; + p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj][i].ref_id; + p->is_long_term = fs->top_field->is_long_term; + } + else + { + p_motion->motion[LIST_0][j][i].mv[0] = fs->bottom_field->motion.motion[LIST_0][jdiv][i].mv[0]; + p_motion->motion[LIST_0][j][i].mv[1] = fs->bottom_field->motion.motion[LIST_0][jdiv][i].mv[1] ; + p_motion->motion[LIST_1][j][i].mv[0] = fs->bottom_field->motion.motion[LIST_1][jdiv][i].mv[0]; + p_motion->motion[LIST_1][j][i].mv[1] = fs->bottom_field->motion.motion[LIST_1][jdiv][i].mv[1] ; + + p_motion->motion[LIST_0][j][i].ref_idx = fs->bottom_field->motion.motion[LIST_0][jdiv][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_idx = fs->bottom_field->motion.motion[LIST_1][jdiv][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj + 4][i].ref_id; + p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj + 4][i].ref_id; + p->is_long_term = fs->bottom_field->is_long_term; + } + } + } + } + } + } + + p->is_long_term = fs->is_long_term; + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + for (j=0 ; j < (fs->size_y>>2); j++) + { + jj = RSD(j); + for (i=0 ; i < (fs->size_x>>2); i++) + { + ii = RSD(i); + + p_motion->motion[LIST_0][j][i] = p_motion->motion[LIST_0][jj][ii]; + p_motion->motion[LIST_1][j][i] = p_motion->motion[LIST_1][jj][ii]; + //p_motion->motion[LIST_0][j][i].mv[0]=p_motion->motion[LIST_0][jj][ii].mv[0]; + //p_motion->motion[LIST_0][j][i].mv[1]=p_motion->motion[LIST_0][jj][ii].mv[1]; + //p_motion->motion[LIST_1][j][i].mv[0]=p_motion->motion[LIST_1][jj][ii].mv[0]; + //p_motion->motion[LIST_1][j][i].mv[1]=p_motion->motion[LIST_1][jj][ii].mv[1]; + + //p_motion->motion[LIST_0][j][i].ref_idx=p_motion->motion[LIST_0][jj][ii].ref_idx; + //p_motion->motion[LIST_1][j][i].ref_idx=p_motion->motion[LIST_1][jj][ii].ref_idx; + //p_motion->motion[LIST_0][j][i].ref_pic_id = p_motion->motion[LIST_0][jj][ii].ref_pic_id; + //p_motion->motion[LIST_1][j][i].ref_pic_id = p_motion->motion[LIST_1][jj][ii].ref_pic_id; + + p_motion->moving_block[j][i]= (byte) ( + !((!p->is_long_term + && ((p_motion->motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p_motion->motion[LIST_0][j][i].ref_idx == -1) + && (p_motion->motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0)))); + } + } + } + else + { + for (j=0 ; j < (fs->size_y>>2); j++) + { + jj = RSD(j); + for (i=0 ; i < (fs->size_x>>2); i++) + { + ii = RSD(i); + p_motion->motion[LIST_0][j][i]=p_motion->motion[LIST_0][jj][ii]; + p_motion->motion[LIST_1][j][i]=p_motion->motion[LIST_1][jj][ii]; + + //p_motion->mv[LIST_0][j][i][0]=p_motion->mv[LIST_0][jj][ii][0]; + //p_motion->mv[LIST_0][j][i][1]=p_motion->mv[LIST_0][jj][ii][1]; + //p_motion->mv[LIST_1][j][i][0]=p_motion->mv[LIST_1][jj][ii][0]; + //p_motion->mv[LIST_1][j][i][1]=p_motion->mv[LIST_1][jj][ii][1]; + + //p_motion->ref_idx[LIST_0][j][i]=p_motion->ref_idx[LIST_0][jj][ii]; + //p_motion->ref_idx[LIST_1][j][i]=p_motion->ref_idx[LIST_1][jj][ii]; + //p_motion->ref_pic_id[LIST_0][j][i] = p_motion->ref_pic_id[LIST_0][jj][ii]; + //p_motion->ref_pic_id[LIST_1][j][i] = p_motion->ref_pic_id[LIST_1][jj][ii]; + } + } + } + } + else + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4; i++) + { + memcpy(&p_motion->motion[LIST_0][j][i].mv, &p_frm_motion->motion[LIST_0][j][i].mv, sizeof(MotionVector)); + p_motion->motion[LIST_0][j][i].ref_idx= p_frm_motion->motion[LIST_0][j][i].ref_idx; + p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion [LIST_0][j][i].ref_id; + memcpy(&p_motion->motion[LIST_1][j][i].mv, &p_frm_motion->motion[LIST_1][j][i].mv, sizeof(MotionVector)); + p_motion->motion[LIST_1][j][i].ref_idx= p_frm_motion->motion[LIST_1][j][i].ref_idx; + p_motion->motion[LIST_1][j][i].ref_pic_id= p_frm_motion->motion [LIST_1][j][i].ref_id; + } + } + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4; i++) + { + p_motion->moving_block[j][i]= + !((!p->is_long_term + && ((p_motion->motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p_motion->motion[LIST_0][j][i].ref_idx == -1) + && (p_motion->motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + } + } + } + + if (currSlice->direct_spatial_mv_pred_flag == 0) + { + if (currSlice->mb_aff_frame_flag || !p_Vid->structure) + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4 ; i++) + { + if (p_frm_motion->field_frame[j][i]) + { + p_motion->motion[LIST_0][j][i].mv[1] *= 2; + p_motion->motion[LIST_1][j][i].mv[1] *= 2; + } + } + } + } + else if (p_Vid->structure) + { + for (j=0 ; j < fs_size_y4; j++) + { + for (i=0 ; i < fs_size_x4 ; i++) + { + if (!p_frm_motion->field_frame[j][i]) + { + p_motion->motion[LIST_0][j][i].mv[1] /= 2; + p_motion->motion[LIST_1][j][i].mv[1] /= 2; + } + } + } + } + + for (j=0; j<2 + (currSlice->mb_aff_frame_flag * 4);j+=2) + { + for (i=0; i<p_Vid->listXsize[j];i++) + { + int prescale, iTRb, iTRp; + + if (j==0) + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->poc - listX[LIST_0 + j][i]->poc ); + } + else if (j == 2) + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->top_poc - listX[LIST_0 + j][i]->poc ); + } + else + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc ); + } + + iTRp = iClip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc); + + if (iTRp!=0) + { + prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp; + currSlice->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ; + } + else + { + currSlice->mvscale[j][i] = 9999; + } + } + } + } +} + + +/*! + ************************************************************************ + * \brief + * Compute co-located motion info + * for 4:4:4 Independent mode + * + ************************************************************************ + */ + +void compute_colocated_JV(Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + StorablePicture *fs, *fs_top, *fs_bottom; + int i,j, ii, jj, jdiv; + int np = p_Vid->colour_plane_id; + + fs_top = fs_bottom = fs = listX[LIST_1 ][0]; + + if (currSlice->mb_aff_frame_flag) + { + fs_top= listX[LIST_1 + 2][0]; + fs_bottom= listX[LIST_1 + 4][0]; + } + else + { + if (p_Vid->field_pic_flag) + { + if ((p_Vid->structure != fs->structure) && (fs->coded_frame)) + { + if (p_Vid->structure==TOP_FIELD) + { + fs_top=fs_bottom=fs = listX[LIST_1 ][0]->top_field; + } + else + { + fs_top=fs_bottom=fs = listX[LIST_1 ][0]->bottom_field; + } + } + } + } + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + for (j=0 ; j<fs->size_y/4 ; j++) + { + jdiv = j/2; + jj = j/2 + 4 * (j/8); + for (i=0 ; i<fs->size_x/4 ; i++) + { + + if (currSlice->mb_aff_frame_flag && fs->motion.field_frame[j][i]) + { + //! Assign frame buffers for field MBs + //! Check whether we should use top or bottom field mvs. + //! Depending on the assigned poc values. + + if (iabs(p_Vid->dec_picture->poc - fs_bottom->poc)> iabs(p_Vid->dec_picture->poc -fs_top->poc) ) + { + p->frame.motion[LIST_0][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_0][jdiv][i].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ; + p->frame.motion[LIST_1][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_1][jdiv][i].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ; + p->frame.motion[LIST_0][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx; + p->frame.motion[LIST_1][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj][i].ref_id; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj][i].ref_id; + + p->is_long_term = fs_top->is_long_term; + } + else + { + p->frame.motion[LIST_0][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_0][jdiv][i].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ; + p->frame.motion[LIST_1][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_1][jdiv][i].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ; + p->frame.motion[LIST_0][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx; + p->frame.motion[LIST_1][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj + 4][i].ref_id; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj + 4][i].ref_id; + + p->is_long_term = fs_bottom->is_long_term; + } + } + else + { + p->frame.motion[LIST_0][j][i].mv[0] = fs->JVmotion[np].motion[LIST_0][j][i].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs->JVmotion[np].motion[LIST_0][j][i].mv[1] ; + p->frame.motion[LIST_1][j][i].mv[0] = fs->JVmotion[np].motion[LIST_1][j][i].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs->JVmotion[np].motion[LIST_1][j][i].mv[1] ; + p->frame.motion[LIST_0][j][i].ref_idx = fs->JVmotion[np].motion[LIST_0][j][i].ref_idx; + p->frame.motion[LIST_1][j][i].ref_idx = fs->JVmotion[np].motion[LIST_1][j][i].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][j][i].ref_id; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][j][i].ref_id; + + p->is_long_term = fs->is_long_term; + } + } + } + } + + + //! Generate field MVs from Frame MVs + if (p_Vid->structure || currSlice->mb_aff_frame_flag) + { + for (j=0 ; j<fs->size_y/8 ; j++) + { + jj = RSD(j); + for (i=0 ; i<fs->size_x/4 ; i++) + { + ii = RSD(i); + //! Do nothing if macroblock as field coded in MB-AFF + if (!currSlice->mb_aff_frame_flag ) + { + p->frame.motion[LIST_0][j][i].mv[0] = fs->JVmotion[np].motion[LIST_0][jj][ii].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs->JVmotion[np].motion[LIST_0][jj][ii].mv[1]; + p->frame.motion[LIST_1][j][i].mv[0] = fs->JVmotion[np].motion[LIST_1][jj][ii].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs->JVmotion[np].motion[LIST_1][jj][ii].mv[1]; + + // Scaling of references is done here since it will not affect spatial direct (2*0 =0) + + if (fs->JVmotion[np].motion[LIST_0][jj][ii].ref_idx == -1) + { + p->frame.motion[LIST_0][j][i].ref_idx = -1; + p->frame.motion[LIST_0][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ? + } + else + { + p->frame.motion[LIST_0][j][i].ref_idx = fs->JVmotion[np].motion[LIST_0][jj][ii].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj][ii].ref_id; + } + + if (fs->JVmotion[np].motion[LIST_1][jj][ii].ref_idx == -1) + { + p->frame.motion[LIST_1][j][i].ref_idx = -1; + p->frame.motion[LIST_1][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ? + } + else + { + p->frame.motion[LIST_1][j][i].ref_idx = fs->JVmotion[np].motion[LIST_1][jj][ii].ref_idx; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj][ii].ref_id; + } + + p->is_long_term = fs->is_long_term; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->frame.moving_block[j][i] = + !((!p->is_long_term + && ((p->frame.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->frame.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->frame.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->frame.motion[LIST_0][j][i].ref_idx == -1) + && (p->frame.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->frame.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->frame.motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + } + else + { + p->bottom.motion[LIST_0][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].mv[0]; + p->bottom.motion[LIST_0][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].mv[1]; + p->bottom.motion[LIST_1][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].mv[0]; + p->bottom.motion[LIST_1][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].mv[1]; + p->bottom.motion[LIST_0][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].ref_idx; + p->bottom.motion[LIST_1][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].ref_idx; + p->bottom.motion[LIST_0][j][i].ref_pic_id = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].ref_id; + p->bottom.motion[LIST_1][j][i].ref_pic_id = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].ref_id; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->bottom.moving_block[j][i] = + !((!fs_bottom->is_long_term + && ((p->bottom.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->bottom.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->bottom.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->bottom.motion[LIST_0][j][i].ref_idx == -1) + && (p->bottom.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->bottom.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->bottom.motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + + p->top.motion[LIST_0][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_0][jj][ii].mv[0]; + p->top.motion[LIST_0][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_0][jj][ii].mv[1]; + p->top.motion[LIST_1][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_1][jj][ii].mv[0]; + p->top.motion[LIST_1][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_1][jj][ii].mv[1]; + p->top.motion[LIST_0][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_0][jj][ii].ref_idx; + p->top.motion[LIST_1][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_1][jj][ii].ref_idx; + p->top.motion[LIST_0][j][i].ref_pic_id = fs_top->JVmotion[np].motion[LIST_0][jj][ii].ref_id; + p->top.motion[LIST_1][j][i].ref_pic_id = fs_top->JVmotion[np].motion[LIST_1][jj][ii].ref_id; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->top.moving_block[j][i] = + !((!fs_top->is_long_term + && ((p->top.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->top.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->top.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->top.motion[LIST_0][j][i].ref_idx == -1) + && (p->top.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->top.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->top.motion[LIST_1][j][i].mv[1])>>1 == 0))); + } + + if ((currSlice->direct_spatial_mv_pred_flag == 0 ) && !fs->motion.field_frame[2*j][i]) + { + p->top.motion[LIST_0][j][i].mv[1] /= 2; + p->top.motion[LIST_1][j][i].mv[1] /= 2; + p->bottom.motion[LIST_0][j][i].mv[1] /= 2; + p->bottom.motion[LIST_1][j][i].mv[1] /= 2; + } + + } + } + } + } + + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + //! Use inference flag to remap mvs/references + //! Frame with field co-located + + if (!p_Vid->structure) + { + for (j=0 ; j<fs->size_y/4 ; j++) + { + jdiv = j/2; + jj = j/2 + 4*(j/8); + for (i=0 ; i<fs->size_x/4 ; i++) + { + + if (fs->motion.field_frame[j][i]) + { + if (iabs(p_Vid->dec_picture->poc - fs->bottom_field->poc) > iabs(p_Vid->dec_picture->poc - fs->top_field->poc)) + { + p->frame.motion[LIST_0][j][i].mv[0] = fs->top_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs->top_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ; + p->frame.motion[LIST_1][j][i].mv[0] = fs->top_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs->top_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ; + + p->frame.motion[LIST_0][j][i].ref_idx = fs->top_field->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx; + p->frame.motion[LIST_1][j][i].ref_idx = fs->top_field->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj][i].ref_id; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj][i].ref_id; + p->is_long_term = fs->top_field->is_long_term; + } + else + { + p->frame.motion[LIST_0][j][i].mv[0] = fs->bottom_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs->bottom_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ; + p->frame.motion[LIST_1][j][i].mv[0] = fs->bottom_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs->bottom_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ; + + p->frame.motion[LIST_0][j][i].ref_idx = fs->bottom_field->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx; + p->frame.motion[LIST_1][j][i].ref_idx = fs->bottom_field->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj + 4][i].ref_id; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj + 4][i].ref_id; + p->is_long_term = fs->bottom_field->is_long_term; + } + } + } + } + } + } + + p->is_long_term = fs->is_long_term; + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + for (j=0 ; j<fs->size_y/4 ; j++) + { + jj = RSD(j); + for (i=0 ; i<fs->size_x/4 ; i++) + { + ii = RSD(i); + + p->frame.motion[LIST_0][j][i] = p->frame.motion[LIST_0][jj][ii]; + p->frame.motion[LIST_1][j][i] = p->frame.motion[LIST_1][jj][ii]; + //p->frame.mv[LIST_0][j][i][0] = p->frame.mv[LIST_0][jj][ii][0]; + //p->frame.mv[LIST_0][j][i][1] = p->frame.mv[LIST_0][jj][ii][1]; + //p->frame.mv[LIST_1][j][i][0] = p->frame.mv[LIST_1][jj][ii][0]; + //p->frame.mv[LIST_1][j][i][1] = p->frame.mv[LIST_1][jj][ii][1]; + + //p->frame.ref_idx[LIST_0][j][i]=p->frame.ref_idx[LIST_0][jj][ii]; + //p->frame.ref_idx[LIST_1][j][i]=p->frame.ref_idx[LIST_1][jj][ii]; + //p->frame.ref_pic_id[LIST_0][j][i] = p->frame.ref_pic_id[LIST_0][jj][ii]; + //p->frame.ref_pic_id[LIST_1][j][i] = p->frame.ref_pic_id[LIST_1][jj][ii]; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->frame.moving_block[j][i]= (byte) ( + !((!p->is_long_term + && ((p->frame.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->frame.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->frame.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->frame.motion[LIST_0][j][i].ref_idx == -1) + && (p->frame.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->frame.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->frame.motion[LIST_1][j][i].mv[1])>>1 == 0)))); + } + } + } + } + else + { + for (j=0 ; j<fs->size_y/4 ; j++) + { + jj = RSD(j); + for (i=0 ; i<fs->size_x/4 ; i++) + { + ii = RSD(i); + //! Use inference flag to remap mvs/references + p->frame.motion[LIST_0][j][i].mv[0] = fs->JVmotion[np].motion[LIST_0][j][i].mv[0]; + p->frame.motion[LIST_0][j][i].mv[1] = fs->JVmotion[np].motion[LIST_0][j][i].mv[1]; + p->frame.motion[LIST_1][j][i].mv[0] = fs->JVmotion[np].motion[LIST_1][j][i].mv[0]; + p->frame.motion[LIST_1][j][i].mv[1] = fs->JVmotion[np].motion[LIST_1][j][i].mv[1]; + + p->frame.motion[LIST_0][j][i].ref_idx = fs->JVmotion[np].motion[LIST_0][j][i].ref_idx; + p->frame.motion[LIST_1][j][i].ref_idx = fs->JVmotion[np].motion[LIST_1][j][i].ref_idx; + p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][j][i].ref_id; + p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][j][i].ref_id; + + if (currSlice->direct_spatial_mv_pred_flag == 1) + { + p->frame.moving_block[j][i]= (byte) ( + !((!p->is_long_term + && ((p->frame.motion[LIST_0][j][i].ref_idx == 0) + && (iabs(p->frame.motion[LIST_0][j][i].mv[0])>>1 == 0) + && (iabs(p->frame.motion[LIST_0][j][i].mv[1])>>1 == 0))) + || ((p->frame.motion[LIST_0][j][i].ref_idx == -1) + && (p->frame.motion[LIST_1][j][i].ref_idx == 0) + && (iabs(p->frame.motion[LIST_1][j][i].mv[0])>>1 == 0) + && (iabs(p->frame.motion[LIST_1][j][i].mv[1])>>1 == 0)))); + } + } + } + } + + + if (currSlice->direct_spatial_mv_pred_flag == 0) + { + for (j=0 ; j<fs->size_y/4 ; j++) + { + for (i=0 ; i<fs->size_x/4 ; i++) + { + if ((!currSlice->mb_aff_frame_flag &&!p_Vid->structure && fs->motion.field_frame[j][i]) || (currSlice->mb_aff_frame_flag && fs->motion.field_frame[j][i])) + { + p->frame.motion[LIST_0][j][i].mv[1] *= 2; + p->frame.motion[LIST_1][j][i].mv[1] *= 2; + } + else if (p_Vid->structure && !fs->motion.field_frame[j][i]) + { + p->frame.motion[LIST_0][j][i].mv[1] /= 2; + p->frame.motion[LIST_1][j][i].mv[1] /= 2; + } + + } + } + + for (j=0; j<2 + (currSlice->mb_aff_frame_flag * 4);j+=2) + { + for (i=0; i<p_Vid->listXsize[j];i++) + { + int prescale, iTRb, iTRp; + + if (j==0) + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->poc - listX[LIST_0 + j][i]->poc ); + } + else if (j == 2) + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->top_poc - listX[LIST_0 + j][i]->poc ); + } + else + { + iTRb = iClip3( -128, 127, p_Vid->dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc ); + } + + iTRp = iClip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc); + + if (iTRp!=0) + { + prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp; + currSlice->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ; + } + else + { + currSlice->mvscale[j][i] = 9999; + } + } + } + } +} + +void copy_storable_param_JV( VideoParameters *p_Vid, PicMotionParams *JVplane, PicMotionParams *motion ) +{ + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + int md_size = (p_Vid->height / BLOCK_SIZE) * (p_Vid->width / BLOCK_SIZE); + int ref_size = active_sps->frame_mbs_only_flag ? 2 * md_size : 6 * md_size; + + memcpy(JVplane->motion, motion->motion, md_size*sizeof(PicMotion)); + if (motion->field_references) + memcpy(JVplane->field_references, motion->field_references, 4*md_size*sizeof(h264_ref_t)); +} diff --git a/Src/h264dec/ldecod/src/mc_prediction.c b/Src/h264dec/ldecod/src/mc_prediction.c new file mode 100644 index 00000000..8af3aae8 --- /dev/null +++ b/Src/h264dec/ldecod/src/mc_prediction.c @@ -0,0 +1,2420 @@ + +/*! +************************************************************************************* +* \file mc_prediction.c +* +* \brief +* Functions for motion compensated prediction +* +* \author +* Main contributors (see contributors.h for copyright, +* address and affiliation details) +* - Alexis Michael Tourapis <alexismt@ieee.org> +* +************************************************************************************* +*/ +#include "global.h" +#include "block.h" +#include "mc_prediction.h" +#include "mbuffer.h" +#include "mb_access.h" +#include "macroblock.h" +#include "memalloc.h" +#include "optim.h" +#include <emmintrin.h> + +static const int COEF[6] = { 1, -5, 20, 20, -5, 1 }; +/*! +************************************************************************ +* \brief +* block single list prediction +************************************************************************ +*/ +static inline void mc_prediction(h264_imgpel_macroblock_t mb_pred, + int joff, + int ver_block_size, + int hor_block_size, + int ioff, + const h264_imgpel_macroblock_t block) +{ + int jj; + + if (hor_block_size == MB_BLOCK_SIZE) + { + memcpy(&(mb_pred[joff][ioff]), &(block[0][0]), hor_block_size * ver_block_size * sizeof(imgpel)); + } + else + { + h264_imgpel_macroblock_row_t *dest = (h264_imgpel_macroblock_row_t *)(mb_pred[joff]); + for(jj = 0; jj < ver_block_size; jj++) + { + memcpy(&dest[jj][ioff], &(block[jj][0]), hor_block_size * sizeof(imgpel)); + } + } +} + +/*! +************************************************************************ +* \brief +* block single list weighted prediction +************************************************************************ +*/ +static inline void weighted_mc_prediction(h264_imgpel_macroblock_row_t *mb_pred, + int ver_block_size, + int hor_block_size, + int wp_scale, + int wp_offset, + int weight_denom) +{ +#ifdef H264_IPP + IppiSize roi = {hor_block_size, ver_block_size}; + ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi); +#else + int ii, jj; + if (weight_denom > 0) + { + for(jj=0;jj<ver_block_size;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<hor_block_size;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + weight_denom = -weight_denom; + for(jj=0;jj<ver_block_size;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<hor_block_size;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +#endif +} + + +void weighted_mc_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + int jj; + + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_scale = _mm_set1_epi16(wp_scale); + __m128i xmm_offset = _mm_set1_epi16(wp_offset); + if (weight_denom > 0) + { + __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom); + __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1))); + + for(jj = 0; jj < 16; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale); + b0_low = _mm_add_epi16(b0_low, xmm_add); + b0_high = _mm_add_epi16(b0_high, xmm_add); + b0_low = _mm_sra_epi16(b0_low, xmm_shift); + b0_high = _mm_sra_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + // (x + (1 << (a-1) )) >> a; + //row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + + __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom); + for(jj = 0; jj < 16; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale); + b0_low = _mm_sll_epi16(b0_low, xmm_shift); + b0_high = _mm_sll_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + //(x << a); + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +} + +#ifdef H264_IPP +void weighted_mc_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + IppiSize roi = {16, 16}; + ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi); +} +#endif + +void weighted_mc_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + int ii, jj; + if (weight_denom > 0) + { + for(jj=0;jj<16;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + weight_denom = -weight_denom; + for(jj=0;jj<16;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +} + + +/* 16x8 */ +void weighted_mc_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + int jj; + + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_scale = _mm_set1_epi16(wp_scale); + __m128i xmm_offset = _mm_set1_epi16(wp_offset); + if (weight_denom > 0) + { + __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom); + __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1))); + + for(jj = 0; jj < 8; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale); + b0_low = _mm_add_epi16(b0_low, xmm_add); + b0_high = _mm_add_epi16(b0_high, xmm_add); + b0_low = _mm_sra_epi16(b0_low, xmm_shift); + b0_high = _mm_sra_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + // (x + (1 << (a-1) )) >> a; + //row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + + __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom); + for(jj = 0; jj < 8; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale); + b0_low = _mm_sll_epi16(b0_low, xmm_shift); + b0_high = _mm_sll_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + //(x << a); + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +} + +#ifdef H264_IPP +void weighted_mc_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + IppiSize roi = {16, 8}; + ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi); +} +#endif + +void weighted_mc_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + int ii, jj; + if (weight_denom > 0) + { + for(jj=0;jj<8;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + weight_denom = -weight_denom; + for(jj=0;jj<8;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +} + + + + +#define LOAD_LINE_EPI16(reg, ptr) { reg = _mm_loadl_epi64((__m128i *)(ptr)); reg = _mm_unpacklo_epi8(reg, xmm_zero); } +void weighted_mc_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + int jj; + + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_scale = _mm_set1_epi16(wp_scale); + __m128i xmm_offset = _mm_set1_epi16(wp_offset); + if (weight_denom > 0) + { + __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom); + __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1))); + + for(jj = 0; jj < 8; jj++) + { + __m128i b0; + LOAD_LINE_EPI16(b0, (__m128i *)mb_pred[jj]); + b0 = _mm_mullo_epi16(b0, xmm_scale); + b0 = _mm_add_epi16(b0, xmm_add); + b0 = _mm_sra_epi16(b0, xmm_shift); + b0 = _mm_add_epi16(b0, xmm_offset); + + b0 = _mm_packus_epi16(b0, b0); // convert back to epi8 + _mm_storel_epi64((__m128i *)mb_pred[jj], b0); + // (x + (1 << (a-1) )) >> a; + //row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + + __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom); + for(jj = 0; jj < 8; jj++) + { + __m128i b0; + LOAD_LINE_EPI16(b0, (__m128i *)mb_pred[jj]); + b0 = _mm_mullo_epi16(b0, xmm_scale); + b0 = _mm_sll_epi16(b0, xmm_shift); + b0 = _mm_add_epi16(b0, xmm_offset); + + b0 = _mm_packus_epi16(b0, b0); // convert back to epi8 + _mm_storel_epi64((__m128i *)mb_pred[jj], b0); + //(x << a); + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +} + +#ifdef H264_IPP +void weighted_mc_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + IppiSize roi = {8, 8}; + ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi); +} +#endif + +void weighted_mc_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom) +{ + int ii, jj; + if (weight_denom > 0) + { + for(jj=0;jj<8;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<8;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } + else + { + weight_denom = -weight_denom; + for(jj=0;jj<8;jj++) + { + imgpel *row = mb_pred[jj]; + const imgpel *b0 = row; + + for(ii=0;ii<8;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset )); + } + } +} + + + +/*! +************************************************************************ +* \brief +* block biprediction +************************************************************************ +*/ +static inline void bi_prediction(h264_imgpel_macroblock_row_t *mb_pred, + //int joff, + const h264_imgpel_macroblock_t block_l0, + //const h264_imgpel_macroblock_t block_l1, + int ver_block_size, + int hor_block_size + //int ioff + ) +{ + +#ifdef H264_IPP + ippiInterpolateBlock_H264_8u_P2P1R(block_l0[0], mb_pred[0], mb_pred[0], hor_block_size, ver_block_size, sizeof(mb_pred[0])); +#else + int ii, jj; + + for(jj = 0;jj < ver_block_size;jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii = 0; ii < hor_block_size;ii++) + row[ii] = (imgpel) rshift_rnd_sf((b0[ii] + b1[ii]), 1); + } +#endif +} + +static void bi_prediction4x4_mmx(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0) +{ + int jj; + __m64 b0, b1; + __m64 mmx_zero = _mm_setzero_si64(); + __m64 mmx_one = _mm_set1_pi16(1); + + for(jj = 0;jj < 4;jj++) + { + b0 = _mm_cvtsi32_si64(*(int *)(&block_l0[jj])); + b0 = _mm_unpacklo_pi8(b0, mmx_zero); + b1 = _mm_cvtsi32_si64(*(int *)(& mb_pred[jj])); + b1 = _mm_unpacklo_pi8(b1, mmx_zero); + b0 = _mm_add_pi16(b0, b1); + b0 = _mm_add_pi16(b0, mmx_one); + b0 = _mm_srai_pi16(b0, 1); + b0 = _mm_packs_pu16(b0, b0); + *(int *)(&mb_pred[jj]) = _mm_cvtsi64_si32(b0); + } +} + +void bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0) +{ + int jj; + __m128i b0, b1; + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_one = _mm_set1_epi16(1); + + for(jj = 0;jj < 8;jj++) + { + LOAD_LINE_EPI16(b0, (__m128i *)block_l0[jj]); + LOAD_LINE_EPI16(b1, (__m128i *)mb_pred[jj]); + b0 = _mm_add_epi16(b0, b1); + b0 = _mm_add_epi16(b0, xmm_one); + b0 = _mm_srai_epi16(b0, 1); + b0 = _mm_packus_epi16(b0, b0); + _mm_storel_epi64((__m128i *)mb_pred[jj], b0); + } +} + +#ifdef H264_IPP +void bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0) +{ + ippiInterpolateBlock_H264_8u_P2P1R(block_l0[0], mb_pred[0], mb_pred[0], 8, 8, sizeof(mb_pred[0])); +} +#endif +/*! +************************************************************************ +* \brief +* block weighted biprediction +************************************************************************ +*/ +static inline void weighted_bi_prediction(h264_imgpel_macroblock_row_t *mb_pred, + const h264_imgpel_macroblock_t block_l0, + int ver_block_size, int hor_block_size, + int wp_scale_l0, int wp_scale_l1, + int wp_offset, int weight_denom) +{ +#ifdef H264_IPP + IppiSize roi = {hor_block_size, ver_block_size}; + ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi); +#else + int ii, jj; + + if (weight_denom > 0) + { + for(jj = 0; jj < ver_block_size; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<hor_block_size;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + weight_denom = -weight_denom; + for(jj = 0; jj < ver_block_size; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<hor_block_size;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +#endif +} + +void weighted_bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + int jj; + + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_scale_l0 = _mm_set1_epi16(wp_scale_l0); + __m128i xmm_scale_l1 = _mm_set1_epi16(wp_scale_l1); + __m128i xmm_offset = _mm_set1_epi16(wp_offset); + if (weight_denom > 0) + { + __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom); + __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1))); + + for(jj = 0; jj < 8; jj++) + { + __m128i b0, b1; + LOAD_LINE_EPI16(b0, (__m128i *)block_l0[jj]); + LOAD_LINE_EPI16(b1, (__m128i *)mb_pred[jj]); + + b0 = _mm_mullo_epi16(b0, xmm_scale_l0); + b1 = _mm_mullo_epi16(b1, xmm_scale_l1); + b0 = _mm_add_epi16(b0, b1); + b0 = _mm_add_epi16(b0, xmm_add); + b0 = _mm_sra_epi16(b0, xmm_shift); + b0 = _mm_add_epi16(b0, xmm_offset); + + b0 = _mm_packus_epi16(b0, b0); // convert back to epi8 + _mm_storel_epi64((__m128i *)mb_pred[jj], b0); + // (x + (1 << (a-1) )) >> a; + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + + __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom); + for(jj = 0; jj < 8; jj++) + { + __m128i b0, b1; + LOAD_LINE_EPI16(b0, (__m128i *)block_l0[jj]); + LOAD_LINE_EPI16(b1, (__m128i *)mb_pred[jj]); + + b0 = _mm_mullo_epi16(b0, xmm_scale_l0); + b1 = _mm_mullo_epi16(b1, xmm_scale_l1); + b0 = _mm_add_epi16(b0, b1); + b0 = _mm_sll_epi16(b0, xmm_shift); + b0 = _mm_add_epi16(b0, xmm_offset); + + b0 = _mm_packus_epi16(b0, b0); // convert back to epi8 + _mm_storel_epi64((__m128i *)mb_pred[jj], b0); + //(x << a); + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +} + +#ifdef H264_IPP +void weighted_bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + IppiSize roi = {8, 8}; + ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi); +} +#endif + +void weighted_bi_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + int ii, jj; + + if (weight_denom > 0) + { + for(jj = 0; jj < 8; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<8;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + weight_denom = -weight_denom; + for(jj = 0; jj < 8; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<8;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +} + +#if defined(_DEBUG) || !defined(_M_IX86) +static inline void weighted_bi_prediction4x4(h264_imgpel_macroblock_row_t *mb_pred, + const h264_imgpel_macroblock_t block_l0, + uint16_t wp_scale_l0, + uint16_t wp_scale_l1, + uint16_t wp_offset, + int weight_denom) +{ +#ifdef H264_IPP + IppiSize roi = {4, 4}; + ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi); +#else + int ii, jj; + + if (weight_denom > 0) + { + for(jj = 0; jj < 4; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<4;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + weight_denom = -weight_denom; + for(jj = 0; jj < 4; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<4;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +#endif +} +#else +extern void weighted_bi_prediction4x4(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom); +#endif + +void weighted_bi_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + int jj; + + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_scale_l0 = _mm_set1_epi16(wp_scale_l0); + __m128i xmm_scale_l1 = _mm_set1_epi16(wp_scale_l1); + __m128i xmm_offset = _mm_set1_epi16(wp_offset); + if (weight_denom > 0) + { + __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom); + __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1))); + + for(jj = 0; jj < 16; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]); + __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero); + __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0); + b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0); + b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1); + b0_low = _mm_add_epi16(b0_low, b1_low); + b0_high = _mm_add_epi16(b0_high, b1_high); + b0_low = _mm_add_epi16(b0_low, xmm_add); + b0_high = _mm_add_epi16(b0_high, xmm_add); + b0_low = _mm_sra_epi16(b0_low, xmm_shift); + b0_high = _mm_sra_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + // (x + (1 << (a-1) )) >> a; + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + + __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom); + for(jj = 0; jj < 16; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]); + __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero); + __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0); + b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0); + b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1); + b0_low = _mm_add_epi16(b0_low, b1_low); + b0_high = _mm_add_epi16(b0_high, b1_high); + b0_low = _mm_sll_epi16(b0_low, xmm_shift); + b0_high = _mm_sll_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + + //(x << a); + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +} + +#ifdef H264_IPP +void weighted_bi_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + IppiSize roi = {16, 16}; + ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi); +} +#endif + +void weighted_bi_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + int ii, jj; + + if (weight_denom > 0) + { + for(jj = 0; jj < 16; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + weight_denom = -weight_denom; + for(jj = 0; jj < 16; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +} + +/* 16x8 */ +void weighted_bi_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + int jj; + + __m128i xmm_zero = _mm_setzero_si128(); + __m128i xmm_scale_l0 = _mm_set1_epi16(wp_scale_l0); + __m128i xmm_scale_l1 = _mm_set1_epi16(wp_scale_l1); + __m128i xmm_offset = _mm_set1_epi16(wp_offset); + if (weight_denom > 0) + { + __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom); + __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1))); + + for(jj = 0; jj < 8; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]); + __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero); + __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0); + b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0); + b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1); + b0_low = _mm_add_epi16(b0_low, b1_low); + b0_high = _mm_add_epi16(b0_high, b1_high); + b0_low = _mm_add_epi16(b0_low, xmm_add); + b0_high = _mm_add_epi16(b0_high, xmm_add); + b0_low = _mm_sra_epi16(b0_low, xmm_shift); + b0_high = _mm_sra_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + // (x + (1 << (a-1) )) >> a; + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + + __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom); + for(jj = 0; jj < 8; jj++) + { + __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]); + __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]); + + __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero); + __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero); + __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero); + __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero); + b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0); + b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1); + b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0); + b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1); + b0_low = _mm_add_epi16(b0_low, b1_low); + b0_high = _mm_add_epi16(b0_high, b1_high); + b0_low = _mm_sll_epi16(b0_low, xmm_shift); + b0_high = _mm_sll_epi16(b0_high, xmm_shift); + b0_low = _mm_add_epi16(b0_low, xmm_offset); + b0_high = _mm_add_epi16(b0_high, xmm_offset); + + b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8 + _mm_store_si128((__m128i *)mb_pred[jj], b0_low); + + //(x << a); + // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +} + +#ifdef H264_IPP +void weighted_bi_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + IppiSize roi = {16, 8}; + ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi); +} +#endif + +void weighted_bi_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom) +{ + int ii, jj; + + if (weight_denom > 0) + { + for(jj = 0; jj < 8; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } + else + { + weight_denom = -weight_denom; + for(jj = 0; jj < 8; jj++) + { + const imgpel *b0 = block_l0[jj]; + imgpel *row = mb_pred[jj]; + const imgpel *b1 = row; + + for(ii=0;ii<16;ii++) + row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset)); + } + } +} + +/*! +************************************************************************ +* \brief +* No reference picture mc +************************************************************************ +*/ +static void get_data_no_ref(h264_imgpel_macroblock_row_t *block, int ver_block_size, int hor_block_size, imgpel med_imgpel_value) +{ + int i, j; +#ifdef _DEBUG + printf("list[ref_frame] is equal to 'no reference picture' before RAP\n"); +#endif + + /* fill the block with sample value middle value */ + for (j = 0; j < ver_block_size; j++) + for (i = 0; i < hor_block_size; i++) + block[j][i] = med_imgpel_value; +} + +/*! +************************************************************************ +* \brief +* Interpolation of 1/4 subpixel +************************************************************************ +*/ +void get_block_luma(Macroblock *currMB, ColorPlane pl, StorablePicture *curr_ref, int x_pos, int y_pos, const short *motion_vector, int hor_block_size, int ver_block_size, h264_imgpel_macroblock_row_t *block) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + if (curr_ref == p_Vid->no_reference_picture && p_Vid->framepoc < p_Vid->recovery_poc) + { + get_data_no_ref(block, ver_block_size, hor_block_size, (imgpel) p_Vid->dc_pred_value_comp[pl]); + } + else + { + IppVCInterpolateBlock_8u block_data; + StorablePicture *dec_picture = p_Vid->dec_picture; + VideoImage *cur_imgY = curr_ref->imgY; + + if (IS_INDEPENDENT(p_Vid)) + { + switch(p_Vid->colour_plane_id ) + { + case 1: + cur_imgY = curr_ref->imgUV[0]; + break; + case 2: + cur_imgY = curr_ref->imgUV[1]; + break; + } + } + else if (pl!=PLANE_Y) + { + cur_imgY = curr_ref->imgUV[pl-1]; + } + + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = block[0]; + block_data.dstStep = sizeof(block[0]); + block_data.sizeFrame.width = dec_picture->size_x; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y; + block_data.sizeBlock.width = hor_block_size; + block_data.sizeBlock.height = ver_block_size; + block_data.pointBlockPos.x = x_pos << 2; + block_data.pointBlockPos.y = y_pos << 2; + block_data.pointVector.x = motion_vector[0]; + block_data.pointVector.y = motion_vector[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + + } +} + +/*! +************************************************************************ +* \brief +* Chroma (0,0) +************************************************************************ +*/ +static void get_chroma_00(h264_imgpel_macroblock_t block, const VideoImage *image, int ver_block_size, int hor_block_size, int x_pos, int y_pos) +{ + ptrdiff_t src_stride = image->stride; // in case the compiler doesn't optimize this + imgpel *src = image->base_address + src_stride * y_pos + x_pos; + + int j, i; + switch(hor_block_size) // basically just unrolling this + { + case 16: + for (j = 0; j < ver_block_size; j++) + { + imgpel *row = block[j]; + for (i = 0; i < 16; i++) + { + row[i] = src[i]; + } + src+=src_stride; + } + break; + case 8: + for (j = 0; j < ver_block_size; j++) + { + imgpel *row = block[j]; + for (i = 0; i < 8; i++) + { + row[i] = src[i]; + } + src+=src_stride; + } + break; + case 4: + for (j = 0; j < ver_block_size; j++) + { + imgpel *row = block[j]; + for (i = 0; i < 4; i++) + { + row[i] = src[i]; + } + src+=src_stride; + } + break; + case 2: + for (j = 0; j < ver_block_size; j++) + { + imgpel *row = block[j]; + for (i = 0; i < 2; i++) + { + row[i] = src[i]; + } + src+=src_stride; + } + break; + default: //degenerate case + for (j = 0; j < ver_block_size; j++) + { + imgpel *row = block[j]; + for (i = 0; i < hor_block_size; i++) + { + row[i] = src[i]; + } + src+=src_stride; + } + break; + } +} + +static void get_block_chroma(Macroblock *currMB, StorablePicture *curr_ref, int x_pos, int y_pos, const short *motion_vector, int hor_block_size, int ver_block_size, h264_imgpel_macroblock_row_t *block0, h264_imgpel_macroblock_row_t *block1, int ioff, int joff) +{ + VideoParameters *p_Vid = currMB->p_Vid; + if (curr_ref == p_Vid->no_reference_picture && p_Vid->framepoc < p_Vid->recovery_poc) + { + get_data_no_ref(block0, ver_block_size, hor_block_size, (imgpel) p_Vid->dc_pred_value_comp[1]); + get_data_no_ref(block1, ver_block_size, hor_block_size, (imgpel) p_Vid->dc_pred_value_comp[2]); + } + else + { + StorablePicture *dec_picture = p_Vid->dec_picture; + IppVCInterpolateBlock_8u block_data; + + block_data.pSrc[0] = curr_ref->imgUV[0]->base_address; + block_data.pSrc[1] = curr_ref->imgUV[1]->base_address; + block_data.srcStep = curr_ref->imgUV[0]->stride; + block_data.pDst[0] = &block0[joff][ioff]; + block_data.pDst[1] = &block1[joff][ioff]; + block_data.dstStep = sizeof(block0[0]); + block_data.sizeFrame.width = dec_picture->size_x_cr; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y_cr >> 1): dec_picture->size_y_cr; + block_data.sizeBlock.width = hor_block_size; + block_data.sizeBlock.height = ver_block_size; + if (dec_picture->chroma_format_idc == YUV444) + { + block_data.pointBlockPos.x = x_pos; + block_data.pointVector.x = motion_vector[0] << 1; + } + else + { + block_data.pointBlockPos.x = x_pos<<1; + block_data.pointVector.x = motion_vector[0]; + } + if (dec_picture->chroma_format_idc == YUV420) + { + block_data.pointVector.y = motion_vector[1]; + block_data.pointBlockPos.y = y_pos<<1; + } + else + { + block_data.pointBlockPos.y = y_pos; + block_data.pointVector.y = motion_vector[1] << 1; + } + + ippiInterpolateChromaBlock_H264_8u_P2R(&block_data); + + + } +} + + +void intra_cr_decoding(Macroblock *currMB, int yuv) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + StorablePicture *dec_picture = p_Vid->dec_picture; + int uv; + int b8,b4; + int ioff, joff; + // TODO: fix 4x4 lossless + + for(uv = 0; uv < 2; uv++) + { + int pl = uv + 1; + const h264_short_block_t *blocks = currSlice->cof4[pl]; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + intrapred_chroma(currMB, uv); + + if ((!(currMB->mb_type == SI4MB) && (currMB->cbp >> 4)) ) + { + if (yuv == YUV420-1) + { + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_8x8_stride(dec_picture->imgUV[uv], currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + else + { + for (b8 = 0; b8 < (p_Vid->num_uv_blocks); b8++) + { + for(b4 = 0; b4 < 4; b4++) + { + joff = subblk_offset_y[yuv][b8][b4]; + ioff = subblk_offset_x[yuv][b8][b4]; + + opt_itrans4x4(blocks[cof4_pos_to_subblock[joff>>2][ioff>>2]], mb_pred, mb_rec, ioff, joff); + + copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + ioff, currMB->pix_c_y + joff, mb_rec, ioff, joff); + } + } + } + } + else if (currMB->mb_type == SI4MB) + { + itrans_sp_cr(currMB, uv); + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 0, currMB->pix_c_y + 0, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 4, currMB->pix_c_y + 0, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 0, currMB->pix_c_y + 4, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 4, currMB->pix_c_y + 4, mb_rec, 4, 4); + } + else + { + if (yuv == YUV420-1) + { + copy_image_data_8x8_stride(dec_picture->imgUV[uv], currMB->pix_c_x, currMB->pix_c_y, mb_pred); + } + else + { + for (b8 = 0; b8 < (p_Vid->num_uv_blocks); b8++) + { + for(b4 = 0; b4 < 4; b4++) + { + joff = subblk_offset_y[yuv][b8][b4]; + ioff = subblk_offset_x[yuv][b8][b4]; + + copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + ioff, currMB->pix_c_y + joff, mb_pred, ioff, joff); + } + } + } + } + } +} + +void prepare_direct_params(Macroblock *currMB, StorablePicture *dec_picture, short pmvl0[2], short pmvl1[2],char *l0_rFrame, char *l1_rFrame) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + char l0_rFrameL, l0_rFrameU, l0_rFrameUR; + char l1_rFrameL, l1_rFrameU, l1_rFrameUR; + PicMotionParams *motion = &dec_picture->motion; + + PixelPos mb[4]; + + get_neighbors0016(currMB, mb); + + if (!currSlice->mb_aff_frame_flag) + { + l0_rFrameL = (char) (mb[0].available ? motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx : -1); + l0_rFrameU = (char) (mb[1].available ? motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx : -1); + l0_rFrameUR = (char) (mb[2].available ? motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx : -1); + + l1_rFrameL = (char) (mb[0].available ? motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx : -1); + l1_rFrameU = (char) (mb[1].available ? motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx : -1); + l1_rFrameUR = (char) (mb[2].available ? motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx : -1); + } + else + { + if (currMB->mb_field) + { + l0_rFrameL = (char) (mb[0].available + ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx < 0 + ? motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx + : motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx * 2: -1); + + l0_rFrameU = (char) (mb[1].available + ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx < 0 + ? motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx + : motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx * 2: -1); + + l0_rFrameUR = (char) (mb[2].available + ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx < 0 + ? motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx + : motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx * 2: -1); + + l1_rFrameL = (char) (mb[0].available + ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx < 0 + ? motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx + : motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx * 2: -1); + + l1_rFrameU = (char) (mb[1].available + ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx < 0 + ? motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx + : motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx * 2: -1); + + l1_rFrameUR = (char) (mb[2].available + ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx < 0 + ? motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx + : motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx * 2: -1); + } + else + { + l0_rFrameL = (char) (mb[0].available + ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx < 0 + ? motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx >> 1 + : motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx: -1); + + l0_rFrameU = (char) (mb[1].available + ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx < 0 + ? motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx >> 1 + : motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx : -1); + + l0_rFrameUR = (char) (mb[2].available + ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx < 0 + ? motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx >> 1 + : motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx : -1); + + l1_rFrameL = (char) (mb[0].available + ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx < 0 + ? motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx >> 1 + : motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx : -1); + + l1_rFrameU = (char) (mb[1].available + ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx < 0 + ? motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx >> 1 + : motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx : -1); + + l1_rFrameUR = (char) (mb[2].available + ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx < 0 + ? motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx >> 1 + : motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx : -1); + } + } + + *l0_rFrame = (char) ((l0_rFrameL >= 0 && l0_rFrameU >= 0) ? imin(l0_rFrameL,l0_rFrameU) : imax(l0_rFrameL,l0_rFrameU)); + *l0_rFrame = (char) ((*l0_rFrame >= 0 && l0_rFrameUR >= 0) ? imin(*l0_rFrame,l0_rFrameUR): imax(*l0_rFrame,l0_rFrameUR)); + + *l1_rFrame = (char) ((l1_rFrameL >= 0 && l1_rFrameU >= 0) ? imin(l1_rFrameL,l1_rFrameU) : imax(l1_rFrameL,l1_rFrameU)); + *l1_rFrame = (char) ((*l1_rFrame >= 0 && l1_rFrameUR >= 0) ? imin(*l1_rFrame,l1_rFrameUR): imax(*l1_rFrame,l1_rFrameUR)); + + if (*l0_rFrame >=0) + currMB->GetMVPredictor (currMB, mb, pmvl0, *l0_rFrame, motion->motion[LIST_0], 0, 0, 16, 16); + + if (*l1_rFrame >=0) + currMB->GetMVPredictor (currMB, mb, pmvl1, *l1_rFrame, motion->motion[LIST_1], 0, 0, 16, 16); +} + +static void check_motion_vector_range(VideoParameters *p_Vid, short mv_x, short mv_y) +{ +#ifdef _DEBUG + if (mv_x > 8191 || mv_x < -8192) + { + fprintf(stderr,"WARNING! Horizontal motion vector %d is out of allowed range {-8192, 8191} in picture %d, macroblock %d\n", mv_x, p_Vid->number, p_Vid->current_mb_nr); + //error("invalid stream: too big horizontal motion vector", 500); + } + + if (mv_y > (p_Vid->max_mb_vmv_r - 1) || mv_y < (-p_Vid->max_mb_vmv_r)) + { + fprintf(stderr,"WARNING! Vertical motion vector %d is out of allowed range {%d, %d} in picture %d, macroblock %d\n", mv_y, (-p_Vid->max_mb_vmv_r), (p_Vid->max_mb_vmv_r - 1), p_Vid->number, p_Vid->current_mb_nr); + //error("invalid stream: too big vertical motion vector", 500); + } +#endif +} + +void perform_mc(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int block_size_x, int block_size_y, int curr_mb_field) +{ + VideoParameters *p_Vid = currMB->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + Slice *currSlice = currMB->p_Slice; + + static const int mv_mul = 16; // 4 * 4 + + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int ioff = (i << 2); + int joff = (j << 2); + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + + get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], block_size_y, block_size_x, alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1; + int block_size_x_cr = block_size_x >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1; + + short mv_cr[2] = {mv_array[0], mv_array[1] + + ((active_sps->chroma_format_idc == YUV420)? list->chroma_vector_adjustment : 0) }; + get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + for(uv=0;uv<2;uv++) + { + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, block_size_x_cr, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l0_ref_idx = l0_refframe; + short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + short l1_ref_idx = l1_refframe; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, block_size_x, block_size_y, tmp_block_l0[0]); + get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x, alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? joff : joff >> 1; + int block_size_x_cr = block_size_x >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1; + + int vec1_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0); + int vec2_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0); + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) }; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) }; + + get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, block_size_x_cr, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + for(uv=0;uv<2;uv++) + { + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr); + } + } + } + } +} + + + +void perform_mc8x16(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field) +{ + VideoParameters *p_Vid = currMB->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + const int block_size_x=8; + const int block_size_y=16; + Slice *currSlice = currMB->p_Slice; + + static const int mv_mul = 16; // 4 * 4 + + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int ioff = (i << 2); + int joff = (j << 2); + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], block_size_y, block_size_x, alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1; + int block_size_x_cr = block_size_x >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1; + + short mv_cr[2] = {mv_array[0], mv_array[1] + + ((active_sps->chroma_format_idc == YUV420)? list->chroma_vector_adjustment : 0) }; + get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + for(uv=0;uv<2;uv++) + { + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, block_size_x_cr, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l0_ref_idx = l0_refframe; + short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + short l1_ref_idx = l1_refframe; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, block_size_x, block_size_y, tmp_block_l0[0]); + get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x, alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? joff : joff >> 1; + int block_size_x_cr = block_size_x >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1; + + int vec1_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0); + int vec2_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0); + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) }; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) }; + + get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, block_size_x_cr, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + for(uv=0;uv<2;uv++) + { + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr); + } + } + } + } +} + +void perform_mc16x8(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field) +{ + VideoParameters *p_Vid = currMB->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + Slice *currSlice = currMB->p_Slice; + + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int ioff = (i << 2); + int joff = (j << 2); + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, 16, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + opt_weighted_mc_prediction16x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1; + int block_size_x_cr = 16 >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? 8 : 8 >> 1; + + short mv_cr[2] = {mv_array[0], mv_array[1] + + ((active_sps->chroma_format_idc == YUV420)? list->chroma_vector_adjustment : 0) }; + get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + for(uv=0;uv<2;uv++) + { + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, block_size_x_cr, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l0_ref_idx = l0_refframe; + short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + short l1_ref_idx = l1_refframe; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, 16, 8, tmp_block_l0[0]); + get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, 16, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + opt_weighted_bi_prediction16x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], 8, 16); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? joff : joff >> 1; + int block_size_x_cr = 16 >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? 8 : 8 >> 1; + + int vec1_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0); + int vec2_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0); + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) }; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) }; + + get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, block_size_x_cr, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + for(uv=0;uv<2;uv++) + { + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr); + } + } + } + } +} + + +static void __forceinline perform_mc8x8_YUV420(Macroblock *currMB, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field) +{ + VideoParameters *p_Vid = currMB->p_Vid; + + Slice *currSlice = currMB->p_Slice; + + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int ioff = (i << 2); + int joff = (j << 2); + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + get_block_luma(currMB, PLANE_Y, list, i4, currMB->block_y_aff + j, mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + opt_weighted_mc_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + + { + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = joff >> 1; + + short mv_cr[2] = {mv_array[0], mv_array[1] + list->chroma_vector_adjustment }; + get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, 4, 4, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + + for(uv=0;uv<2;uv++) + { + + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], 4, 4, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_ref_idx = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l1_ref_idx = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + + StorablePicture *ref_image0 = p_Vid->listX[LIST_0 + list_offset][l0_ref_idx]; + StorablePicture *ref_image1 = p_Vid->listX[LIST_1 + list_offset][l1_ref_idx]; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid)) + { + get_block_luma(currMB, PLANE_Y, ref_image0, i4, currMB->block_y_aff + j, l0_mv_array, 8, 8, tmp_block_l0[0]); + get_block_luma(currMB, PLANE_Y, ref_image1, i4, currMB->block_y_aff + j, l1_mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff]); + } + else + { + IppVCInterpolateBlock_8u block_data; + + block_data.pSrc[0] = ref_image0->imgY->base_address; + block_data.srcStep = ref_image0->imgY->stride; + block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]); + block_data.dstStep = sizeof(tmp_block_l0[0][0]); + block_data.sizeFrame.width = dec_picture->size_x; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y; + block_data.sizeBlock.width = 8; + block_data.sizeBlock.height = 8; + block_data.pointBlockPos.x = i4 << 2; + block_data.pointBlockPos.y = (currMB->block_y_aff + j) << 2; + block_data.pointVector.x = l0_mv_array[0]; + block_data.pointVector.y = l0_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + block_data.pSrc[0] = ref_image1->imgY->base_address; + block_data.srcStep = ref_image1->imgY->stride; + block_data.pDst[0] = &currSlice->mb_pred[0][joff][ioff]; + block_data.pointVector.x = l1_mv_array[0]; + block_data.pointVector.y = l1_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + } + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + opt_weighted_bi_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff], tmp_block_l0[0], 8, 8); + } + + { + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = joff >> 1; + + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ref_image0->chroma_vector_adjustment}; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ref_image1->chroma_vector_adjustment}; + + if (p_Vid->framepoc < p_Vid->recovery_poc) + { + get_block_chroma(currMB, ref_image0, i4, currMB->block_y_aff + j, mv_cr1, 4, 4, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, ref_image1, i4, currMB->block_y_aff + j, mv_cr2, 4, 4, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + } + else + { + IppVCInterpolateBlock_8u block_data; + block_data.pSrc[0] = ref_image0->imgUV[0]->base_address; + block_data.pSrc[1] = ref_image0->imgUV[1]->base_address; + block_data.srcStep = ref_image0->imgUV[0]->stride; + block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]); + block_data.pDst[1] = (Ipp8u *)(tmp_block_l0[1]); + block_data.dstStep = sizeof(tmp_block_l0[0][0]); + block_data.sizeFrame.width = dec_picture->size_x_cr; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y_cr >> 1): dec_picture->size_y_cr; + block_data.sizeBlock.width = 4; + block_data.sizeBlock.height = 4; + block_data.pointBlockPos.x = i4<<1; + block_data.pointVector.x = mv_cr1[0]; + block_data.pointVector.y = mv_cr1[1]; + block_data.pointBlockPos.y = (currMB->block_y_aff + j)<<1; + ippiInterpolateChromaBlock_H264_8u_P2R(&block_data); + block_data.pSrc[0] = ref_image1->imgUV[0]->base_address; + block_data.pSrc[1] = ref_image1->imgUV[1]->base_address; + block_data.srcStep = ref_image1->imgUV[0]->stride; + block_data.pDst[0] = &currSlice->mb_pred[1][joff_cr][ioff_cr]; + block_data.pDst[1] = &currSlice->mb_pred[2][joff_cr][ioff_cr]; + block_data.pointVector.x = mv_cr2[0]; + block_data.pointVector.y = mv_cr2[1]; + ippiInterpolateChromaBlock_H264_8u_P2R(&block_data); + } + + for(uv=0;uv<2;uv++) + { + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + weighted_bi_prediction4x4((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction4x4_mmx((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv]); + } + } + } + } +} + +void perform_mc8x8(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field) +{ + if (dec_picture->chroma_format_idc == YUV420) + { + perform_mc8x8_YUV420(currMB, dec_picture, pred_dir, i, j, list_offset, curr_mb_field); + } + else + { + VideoParameters *p_Vid = currMB->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + Slice *currSlice = currMB->p_Slice; + + static const int mv_mul = 16; // 4 * 4 + + int i4 = currMB->block_x + i; + int j4 = currMB->block_y + j; + int ioff = (i << 2); + int joff = (j << 2); + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + opt_weighted_mc_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + + if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) ) + { // YUV420 or YUV422 + int uv; + + int ioff_cr = ioff >> 1; + int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1; + int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? 8 : 4; + + short mv_cr[2] = {mv_array[0], mv_array[1] }; + get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, 4, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + + for(uv=0;uv<2;uv++) + { + + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, 4, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l0_ref_idx = l0_refframe; + short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + short l1_ref_idx = l1_refframe; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid) || pl!=PLANE_Y) + { + get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, 8, 8, tmp_block_l0[0]); + get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]); + } + else + { + VideoImage *cur_imgY = p_Vid->listX[LIST_0 + list_offset][l0_refframe]->imgY; + IppVCInterpolateBlock_8u block_data; + + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]); + block_data.dstStep = sizeof(tmp_block_l0[0][0]); + block_data.sizeFrame.width = dec_picture->size_x; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y; + block_data.sizeBlock.width = 8; + block_data.sizeBlock.height = 8; + block_data.pointBlockPos.x = i4 << 2; + block_data.pointBlockPos.y = (currMB->block_y_aff + j) << 2; + block_data.pointVector.x = l0_mv_array[0]; + block_data.pointVector.y = l0_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + cur_imgY = p_Vid->listX[LIST_1 + list_offset][l1_refframe]->imgY; + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = &currSlice->mb_pred[pl][joff][ioff]; + block_data.pointVector.x = l1_mv_array[0]; + block_data.pointVector.y = l1_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + } + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + opt_weighted_bi_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], 8, 8); + } + + if (dec_picture->chroma_format_idc == YUV422) + { + int uv; + int ioff_cr = ioff >> 1; + int joff_cr = joff; + + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1]}; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1]}; + + get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, 4, 8, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, 4, 8, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr); + + for(uv=0;uv<2;uv++) + { + + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], 8, 4, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], 8, 4); + } + } + } + } + } +} + + +static void __forceinline perform_mc16x16_YUV420(Macroblock *currMB, StorablePicture *dec_picture, int pred_dir, int list_offset, int curr_mb_field) +{ + VideoParameters *p_Vid = currMB->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + Slice *currSlice = currMB->p_Slice; + + static const int mv_mul = 16; // 4 * 4 + + int i4 = currMB->block_x; + int j4 = currMB->block_y; + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + get_block_luma(currMB, PLANE_Y, list, i4, currMB->block_y_aff, mv_array, 16, 16, currSlice->mb_pred[0]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + opt_weighted_mc_prediction16x16(currSlice->mb_pred[0], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + { + int uv; + short mv_cr[2] = {mv_array[0], mv_array[1] + list->chroma_vector_adjustment }; + get_block_chroma(currMB, list, i4, currMB->block_y_aff, mv_cr, 8, 8, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0); + + for(uv=0;uv<2;uv++) + { + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + opt_weighted_mc_prediction8x8(currSlice->mb_pred[uv + 1], alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l0_ref_idx = l0_refframe; + short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + short l1_ref_idx = l1_refframe; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid)) + { + get_block_luma(currMB, PLANE_Y, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff, l0_mv_array, 16, 16, tmp_block_l0[0]); + get_block_luma(currMB, PLANE_Y, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff, l1_mv_array, 16, 16, currSlice->mb_pred[0]); + } + else + { + VideoImage *cur_imgY = p_Vid->listX[LIST_0 + list_offset][l0_refframe]->imgY; + IppVCInterpolateBlock_8u block_data; + + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]); + block_data.dstStep = sizeof(tmp_block_l0[0][0]); + block_data.sizeFrame.width = dec_picture->size_x; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y; + block_data.sizeBlock.width = 16; + block_data.sizeBlock.height = 16; + block_data.pointBlockPos.x = i4 << 2; + block_data.pointBlockPos.y = currMB->block_y_aff<< 2; + block_data.pointVector.x = l0_mv_array[0]; + block_data.pointVector.y = l0_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + cur_imgY = p_Vid->listX[LIST_1 + list_offset][l1_refframe]->imgY; + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = (Ipp8u *)(currSlice->mb_pred[0]); + block_data.pointVector.x = l1_mv_array[0]; + block_data.pointVector.y = l1_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + } + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + opt_weighted_bi_prediction16x16(currSlice->mb_pred[0], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction(currSlice->mb_pred[0], tmp_block_l0[0], 16, 16); + } + + { + int uv; + + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment }; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment }; + + get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff , mv_cr1, 8, 8, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff , mv_cr2, 8, 8, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0); + + for(uv=0;uv<2;uv++) + { + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + opt_weighted_bi_prediction8x8(currSlice->mb_pred[uv+1], tmp_block_l0[uv], alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction(currSlice->mb_pred[uv+1], tmp_block_l0[uv], 8, 8); + } + } + } + + } +} + + + +void perform_mc16x16(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int list_offset, int curr_mb_field) +{ + if (dec_picture->chroma_format_idc == YUV420) + { + perform_mc16x16_YUV420(currMB, dec_picture, pred_dir, list_offset, curr_mb_field); + } + else + { + VideoParameters *p_Vid = currMB->p_Vid; + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + Slice *currSlice = currMB->p_Slice; + + static const int mv_mul = 16; // 4 * 4 + + int i4 = currMB->block_x; + int j4 = currMB->block_y; + + assert (pred_dir<=2); + + if (pred_dir != 2) + { + //===== Single List Prediction ===== + short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx; + short ref_idx_wp = ref_idx; + short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv; + StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx]; + + check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]); + + get_block_luma(currMB, pl, list, i4, currMB->block_y_aff, mv_array, 16, 16, currSlice->mb_pred[pl]); + + if (currSlice->apply_weights) + { + int alpha_l0, wp_offset; + if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))) + { + ref_idx_wp >>=1; + } + + alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0]; + wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0]; + + opt_weighted_mc_prediction16x16(currSlice->mb_pred[pl], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom); + } + + if (dec_picture->chroma_format_idc == YUV422) + { + int uv; + short mv_cr[2] = {mv_array[0], mv_array[1]}; + get_block_chroma(currMB, list, i4, currMB->block_y_aff, mv_cr, 8, 16, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0); + + for(uv=0;uv<2;uv++) + { + if (currSlice->apply_weights) + { + int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1]; + int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1]; + + weighted_mc_prediction(currSlice->mb_pred[uv + 1], 16, 8, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom); + } + } + } + } + else + { + //===== BI-PREDICTION ===== + __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2]; + short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv; + short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv; + + short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx; + short l0_ref_idx = l0_refframe; + short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx; + short l1_ref_idx = l1_refframe; + + check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]); + check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]); + + if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid) || pl!=PLANE_Y) + { + get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff, l0_mv_array, 16, 16, tmp_block_l0[0]); + get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff, l1_mv_array, 16, 16, currSlice->mb_pred[pl]); + } + else + { + VideoImage *cur_imgY = p_Vid->listX[LIST_0 + list_offset][l0_refframe]->imgY; + IppVCInterpolateBlock_8u block_data; + + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]); + block_data.dstStep = sizeof(tmp_block_l0[0][0]); + block_data.sizeFrame.width = dec_picture->size_x; + block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y; + block_data.sizeBlock.width = 16; + block_data.sizeBlock.height = 16; + block_data.pointBlockPos.x = i4 << 2; + block_data.pointBlockPos.y = currMB->block_y_aff<< 2; + block_data.pointVector.x = l0_mv_array[0]; + block_data.pointVector.y = l0_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + cur_imgY = p_Vid->listX[LIST_1 + list_offset][l1_refframe]->imgY; + block_data.pSrc[0] = cur_imgY->base_address; + block_data.srcStep = cur_imgY->stride; + block_data.pDst[0] = (Ipp8u *)(currSlice->mb_pred[pl]); + block_data.pointVector.x = l1_mv_array[0]; + block_data.pointVector.y = l1_mv_array[1]; + ippiInterpolateLumaBlock_H264_8u_P1R(&block_data); + } + + if(currSlice->apply_weights) + { + int alpha_l0, alpha_l1, wp_offset; + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks. + // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0; + if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))|| + (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field) + { + l0_ref_idx >>=1; + l1_ref_idx >>=1; + } + + alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0]; + wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1); + + opt_weighted_bi_prediction16x16(currSlice->mb_pred[pl], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1)); + } + else + { + bi_prediction(currSlice->mb_pred[pl], tmp_block_l0[0], 16, 16); + } + + if (dec_picture->chroma_format_idc == YUV422) + { // YUV422 + int uv; + + int block_size_y_cr = p_Vid->mb_cr_size_y; + + short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) }; + short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) }; + + get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff , mv_cr1, 8, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0); + get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff , mv_cr2, 8, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0); + + + for(uv=0;uv<2;uv++) + { + if(currSlice->apply_weights) + { + int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0; + + int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1]; + int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1); + + weighted_bi_prediction(currSlice->mb_pred[uv+1], tmp_block_l0[uv], block_size_y_cr, 8, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1)); + } + else + { + bi_prediction(currSlice->mb_pred[uv+1], tmp_block_l0[uv], block_size_y_cr, 8); + } + } + } + } + } +} + diff --git a/Src/h264dec/ldecod/src/meminput.c b/Src/h264dec/ldecod/src/meminput.c new file mode 100644 index 00000000..54465340 --- /dev/null +++ b/Src/h264dec/ldecod/src/meminput.c @@ -0,0 +1,134 @@ +#include "global.h" +#include "meminput.h" + +void malloc_mem_input(VideoParameters *p_Vid) +{ + if ( (p_Vid->mem_input = (memory_input_t *) calloc(1, sizeof(memory_input_t))) == NULL) + { + snprintf(errortext, ET_SIZE, "Memory allocation for memory input failed"); + error(errortext,100); + } +} + +void free_mem_input(VideoParameters *p_Vid) +{ + free(p_Vid->mem_input); + p_Vid->mem_input = NULL; +} + +/*! +************************************************************************ +* \brief +* returns a byte from IO buffer +************************************************************************ +*/ +static inline uint8_t getfbyte(memory_input_t *mem_input) +{ + return mem_input->user_buffer[mem_input->user_buffer_read++]; +} + + +/*! + ************************************************************************ + * \brief + * returns if new start code is found at byte aligned position buf. + * new-startcode is of form N 0x00 bytes, followed by a 0x01 byte. + * + * \return + * 1 if start-code is found or \n + * 0, indicating that there is no start code + * + * \param Buf + * pointer to byte-stream + * \param zeros_in_startcode + * indicates number of 0x00 bytes in start-code. + ************************************************************************ + */ +static inline int FindStartCode (unsigned char *Buf, int zeros_in_startcode) +{ + int i; + + for (i = 0; i < zeros_in_startcode; i++) + { + if(*(Buf++) != 0) + { + return 0; + } + } + + if(*Buf != 1) + return 0; + + return 1; +} + + +/*! + ************************************************************************ + * \brief + * Returns the size of the NALU (bits between start codes in case of + * Annex B. nalu->buf and nalu->len are filled. Other field in + * nalu-> remain uninitialized (will be taken care of by NALUtoRBSP. + * + * \return + * 0 if there is nothing any more to read (EOF) + * -1 in case of any error + * + * \note Side-effect: Returns length of start-code in bytes. + * + * \note + * GetAnnexbNALU expects start codes at byte aligned positions in the file + * + ************************************************************************ + */ +int GetMemoryNALU (VideoParameters *p_Vid, NALU_t *nalu) +{ + memory_input_t *mem_input = p_Vid->mem_input; + if (!mem_input->user_buffer) + return 0; + nalu->len = mem_input->user_buffer_size; + memcpy(nalu->buf, mem_input->user_buffer, nalu->len); + memzero16(nalu->buf+nalu->len); // add some extra 0's to the end + nalu->forbidden_bit = (*(nalu->buf) >> 7) & 1; + nalu->nal_reference_idc = (NalRefIdc) ((*(nalu->buf) >> 5) & 3); + nalu->nal_unit_type = (NaluType) ((*(nalu->buf)) & 0x1f); + nalu->lost_packets = 0; + mem_input->user_buffer = 0; + + if (mem_input->skip_b_frames && nalu->nal_reference_idc == NALU_PRIORITY_DISPOSABLE) + return 0; + + if (mem_input->resetting && nalu->nal_unit_type != NALU_TYPE_IDR) + return 0; + + mem_input->resetting = 0; + + return 1; +} + + +/*! + ************************************************************************ + * \brief + * Opens the bit stream file named fn + * \return + * none + ************************************************************************ + */ +void OpenMemory(VideoParameters *p_Vid, const char *fn) +{ + memory_input_t *mem_input = p_Vid->mem_input; +} + + +/*! + ************************************************************************ + * \brief + * Closes the bit stream file + ************************************************************************ + */ +void CloseMemory(VideoParameters *p_Vid) +{ + memory_input_t *mem_input = p_Vid->mem_input; +} + diff --git a/Src/h264dec/ldecod/src/nal.c b/Src/h264dec/ldecod/src/nal.c new file mode 100644 index 00000000..73c39474 --- /dev/null +++ b/Src/h264dec/ldecod/src/nal.c @@ -0,0 +1,123 @@ + +/*! + ************************************************************************ + * \file nal.c + * + * \brief + * Converts Encapsulated Byte Sequence Packets (EBSP) to Raw Byte + * Sequence Packets (RBSP), and then onto String Of Data Bits (SODB) + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Shankar L. Regunathan <shanre@microsoft.com> + * - Tobias Oelbaum <oelbaum@drehvial.de> +************************************************************************ + */ + +#include "contributors.h" +#include "global.h" + + /*! + ************************************************************************ + * \brief + * Converts RBSP to string of data bits + * \param streamBuffer + * pointer to buffer containing data + * \param last_byte_pos + * position of the last byte containing data. + * \return last_byte_pos + * position of the last byte pos. If the last-byte was entirely a stuffing byte, + * it is removed, and the last_byte_pos is updated. + * +************************************************************************/ + +int RBSPtoSODB(byte *streamBuffer, int last_byte_pos) +{ + int ctr_bit, bitoffset; + + bitoffset = 0; + //find trailing 1 + ctr_bit = (streamBuffer[last_byte_pos-1] & (0x01<<bitoffset)); // set up control bit + + while (ctr_bit==0) + { // find trailing 1 bit + bitoffset++; + if(bitoffset == 8) + { + if(last_byte_pos == 0) + printf(" Panic: All zero data sequence in RBSP \n"); + assert(last_byte_pos != 0); + last_byte_pos -= 1; + bitoffset = 0; + } + ctr_bit= streamBuffer[last_byte_pos-1] & (0x01<<(bitoffset)); + } + + + // We keep the stop bit for now +/* if (remove_stop) + { + streamBuffer[last_byte_pos-1] -= (0x01<<(bitoffset)); + if(bitoffset == 7) + return(last_byte_pos-1); + else + return(last_byte_pos); + } +*/ + return(last_byte_pos); + +} + + +/*! +************************************************************************ +* \brief +* Converts Encapsulated Byte Sequence Packets to RBSP +* \param streamBuffer +* pointer to data stream +* \param end_bytepos +* size of data stream +* \param begin_bytepos +* Position after beginning +************************************************************************/ + +// TODO: benski> optimize using BitScanReverse +int EBSPtoRBSP(byte *streamBuffer, int end_bytepos) +{ + int i, j, count; + int begin_bytepos = 1; + count = 0; + + + if(end_bytepos < begin_bytepos) + return end_bytepos; + + j = begin_bytepos; + + for(i = begin_bytepos; i < end_bytepos; i++) + { //starting from begin_bytepos to avoid header information + //in NAL unit, 0x000000, 0x000001 or 0x000002 shall not occur at any byte-aligned position + if(count == ZEROBYTES_SHORTSTARTCODE && streamBuffer[i] < 0x03) + return j;//-1; + if(count == ZEROBYTES_SHORTSTARTCODE && streamBuffer[i] == 0x03) + { + //check the 4th byte after 0x000003, except when cabac_zero_word is used, in which case the last three bytes of this NAL unit must be 0x000003 + if((i < end_bytepos-1) && (streamBuffer[i+1] > 0x03)) + return -1; + //if cabac_zero_word is used, the final byte of this NAL unit(0x03) is discarded, and the last two bytes of RBSP must be 0x0000 + if(i == end_bytepos-1) + return j; + + i++; + count = 0; + } + streamBuffer[j] = streamBuffer[i]; + if(streamBuffer[i] == 0x00) + count++; + else + count = 0; + j++; + } + + return j; +} diff --git a/Src/h264dec/ldecod/src/nalu.c b/Src/h264dec/ldecod/src/nalu.c new file mode 100644 index 00000000..e70617e3 --- /dev/null +++ b/Src/h264dec/ldecod/src/nalu.c @@ -0,0 +1,162 @@ + +/*! + ************************************************************************ + * \file nalu.c + * + * \brief + * Decoder NALU support functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + ************************************************************************ + */ + +#include "global.h" +#include "nalu.h" +#include "memalloc.h" +#include "meminput.h" + +/*! +************************************************************************************* +* \brief +* Initialize bitstream reading structure +* +* \param +* p_Vid: Imageparameter information +* \param +* filemode: +* +************************************************************************************* +*/ +void OpenMemory(VideoParameters *p_Vid, const char *fn); +void CloseMemory(VideoParameters *p_Vid); +int GetMemoryNALU (VideoParameters *p_Vid, NALU_t *nalu); + +void initBitsFile (VideoParameters *p_Vid) +{ + malloc_mem_input(p_Vid); + p_Vid->nalu = AllocNALU(MAX_CODED_FRAME_SIZE); + +} + +/*! + ************************************************************************************* + * \brief + * Converts a NALU to an RBSP + * + * \param + * nalu: nalu structure to be filled + * + * \return + * length of the RBSP in bytes + ************************************************************************************* + */ + +static int NALUtoRBSP (NALU_t *nalu) +{ + assert (nalu != NULL); + + nalu->len = EBSPtoRBSP (nalu->buf, nalu->len) ; + + return nalu->len ; +} + +/*! +************************************************************************ +* \brief +* Read the next NAL unit (with error handling) +************************************************************************ +*/ +int read_next_nalu(VideoParameters *p_Vid, NALU_t *nalu) +{ + InputParameters *p_Inp = p_Vid->p_Inp; + int ret; + + ret = GetMemoryNALU(p_Vid, nalu); + + if (ret < 0) + { + error ("Error while getting the next NALU, exit\n", 601); + } + if (ret == 0) + { + return 0; + } + + //In some cases, zero_byte shall be present. If current NALU is a VCL NALU, we can't tell + //whether it is the first VCL NALU at this point, so only non-VCL NAL unit is checked here. + CheckZeroByteNonVCL(p_Vid, nalu); + + ret = NALUtoRBSP(nalu); + + if (ret < 0) + error ("Invalid startcode emulation prevention found.", 602); + + + // Got a NALU + if (nalu->forbidden_bit) + { + error ("Found NALU with forbidden_bit set, bit error?", 603); + } + + return nalu->len; +} + +void CheckZeroByteNonVCL(VideoParameters *p_Vid, NALU_t *nalu) +{ + int CheckZeroByte=0; + + //This function deals only with non-VCL NAL units + if(nalu->nal_unit_type>=1&&nalu->nal_unit_type<=5) + return; + + //for SPS and PPS, zero_byte shall exist + if(nalu->nal_unit_type==NALU_TYPE_SPS || nalu->nal_unit_type==NALU_TYPE_PPS) + CheckZeroByte=1; + //check the possibility of the current NALU to be the start of a new access unit, according to 7.4.1.2.3 + if(nalu->nal_unit_type==NALU_TYPE_AUD || nalu->nal_unit_type==NALU_TYPE_SPS || + nalu->nal_unit_type==NALU_TYPE_PPS || nalu->nal_unit_type==NALU_TYPE_SEI || + (nalu->nal_unit_type>=13 && nalu->nal_unit_type<=18)) + { + if(p_Vid->LastAccessUnitExists) + { + p_Vid->LastAccessUnitExists=0; //deliver the last access unit to decoder + p_Vid->NALUCount=0; + } + } + p_Vid->NALUCount++; + //for the first NAL unit in an access unit, zero_byte shall exists + if(p_Vid->NALUCount==1) + CheckZeroByte=1; + if(CheckZeroByte && nalu->startcodeprefix_len==3) + { + // printf("Warning: zero_byte shall exist\n"); + //because it is not a very serious problem, we do not exit here + } +} + +void CheckZeroByteVCL(VideoParameters *p_Vid, NALU_t *nalu) +{ + int CheckZeroByte=0; + + //This function deals only with VCL NAL units + if(!(nalu->nal_unit_type>=1&&nalu->nal_unit_type<=5)) + return; + + if(p_Vid->LastAccessUnitExists) + { + p_Vid->NALUCount=0; + } + p_Vid->NALUCount++; + //the first VCL NAL unit that is the first NAL unit after last VCL NAL unit indicates + //the start of a new access unit and hence the first NAL unit of the new access unit. (sounds like a tongue twister :-) + if(p_Vid->NALUCount == 1) + CheckZeroByte = 1; + p_Vid->LastAccessUnitExists = 1; + if(CheckZeroByte && nalu->startcodeprefix_len==3) + { + //printf("warning: zero_byte shall exist\n"); + //because it is not a very serious problem, we do not exit here + } +} diff --git a/Src/h264dec/ldecod/src/nalucommon.c b/Src/h264dec/ldecod/src/nalucommon.c new file mode 100644 index 00000000..fe900722 --- /dev/null +++ b/Src/h264dec/ldecod/src/nalucommon.c @@ -0,0 +1,73 @@ + +/*! + ************************************************************************ + * \file nalucommon.c + * + * \brief + * Common NALU support functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + ************************************************************************ + */ + +#include "global.h" +#include "nalu.h" +#include "memalloc.h" +#include <bfc/platform/types.h> + +/*! + ************************************************************************************* + * \brief + * Allocates memory for a NALU + * + * \param buffersize + * size of NALU buffer + * + * \return + * pointer to a NALU + ************************************************************************************* + */ +NALU_t *AllocNALU(int buffersize) +{ + NALU_t *n; + + if ((n = (NALU_t*)calloc (1, sizeof (NALU_t))) == NULL) + return 0; + + n->max_size=buffersize; + + if ((n->buf = (uint8_t *)_aligned_malloc(buffersize, 32)) == NULL) + { + free (n); + return 0; + } + memset(n->buf, 0, buffersize); + + return n; +} + + +/*! + ************************************************************************************* + * \brief + * Frees a NALU + * + * \param n + * NALU to be freed + * + ************************************************************************************* + */ +void FreeNALU(NALU_t *n) +{ + if (n != NULL) + { + if (n->buf != NULL) + { + _aligned_free(n->buf); + n->buf=NULL; + } + free (n); + } +} diff --git a/Src/h264dec/ldecod/src/output.c b/Src/h264dec/ldecod/src/output.c new file mode 100644 index 00000000..74576af0 --- /dev/null +++ b/Src/h264dec/ldecod/src/output.c @@ -0,0 +1,599 @@ + +/*! + ************************************************************************ + * \file output.c + * + * \brief + * Output an image and Trance support + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Suehring <suehring@hhi.de> + ************************************************************************ + */ + +#include "contributors.h" + +#include "global.h" +#include "mbuffer.h" +#include "image.h" +#include "memalloc.h" +#include "sei.h" + +static void write_out_picture(VideoParameters *p_Vid, StorablePicture *p); + + +#if (PAIR_FIELDS_IN_OUTPUT) + +void clear_picture(VideoParameters *p_Vid, StorablePicture *p); + +/*! + ************************************************************************ + * \brief + * output the pending frame buffer + * \param p_out + * Output file + ************************************************************************ + */ +void flush_pending_output(VideoParameters *p_Vid) +{ + if (p_Vid->pending_output_state != FRAME) + { + write_out_picture(p_Vid, p_Vid->pending_output); + } + + if (p_Vid->pending_output->imgY) + { + free_mem2Dpel (p_Vid->pending_output->imgY); + p_Vid->pending_output->imgY=NULL; + } + if (p_Vid->pending_output->imgUV) + { + free_mem3Dpel (p_Vid->pending_output->imgUV); + p_Vid->pending_output->imgUV=NULL; + } + + p_Vid->pending_output_state = FRAME; +} + + +/*! + ************************************************************************ + * \brief + * Writes out a storable picture + * If the picture is a field, the output buffers the picture and tries + * to pair it with the next field. + * \param p + * Picture to be written + * \param p_out + * Output file + ************************************************************************ + */ +void write_picture(VideoParameters *p_Vid, StorablePicture *p, int real_structure) +{ + int i, add; + + if (real_structure==FRAME) + { + flush_pending_output(p_Vid); + write_out_picture(p_Vid, p); + return; + } + if (real_structure == p_Vid->pending_output_state) + { + flush_pending_output(p_Vid); + write_picture(p_Vid, p, real_structure); + return; + } + + if (p_Vid->pending_output_state == FRAME) + { + p_Vid->pending_output->size_x = p->size_x; + p_Vid->pending_output->size_y = p->size_y; + p_Vid->pending_output->size_x_cr = p->size_x_cr; + p_Vid->pending_output->size_y_cr = p->size_y_cr; + p_Vid->pending_output->chroma_format_idc = p->chroma_format_idc; + + p_Vid->pending_output->frame_mbs_only_flag = p->frame_mbs_only_flag; + p_Vid->pending_output->frame_cropping_flag = p->frame_cropping_flag; + if (p_Vid->pending_output->frame_cropping_flag) + { + p_Vid->pending_output->frame_cropping_rect_left_offset = p->frame_cropping_rect_left_offset; + p_Vid->pending_output->frame_cropping_rect_right_offset = p->frame_cropping_rect_right_offset; + p_Vid->pending_output->frame_cropping_rect_top_offset = p->frame_cropping_rect_top_offset; + p_Vid->pending_output->frame_cropping_rect_bottom_offset = p->frame_cropping_rect_bottom_offset; + } + + get_mem2Dpel (&(p_Vid->pending_output->imgY), p_Vid->pending_output->size_y, p_Vid->pending_output->size_x); + get_mem3Dpel (&(p_Vid->pending_output->imgUV), 2, p_Vid->pending_output->size_y_cr, p_Vid->pending_output->size_x_cr); + + clear_picture(p_Vid, p_Vid->pending_output); + + // copy first field + if (real_structure == TOP_FIELD) + { + add = 0; + } + else + { + add = 1; + } + + for (i=0; i<p_Vid->pending_output->size_y; i+=2) + { + memcpy(p_Vid->pending_output->imgY[(i+add)], p->imgY[(i+add)], p->size_x * sizeof(imgpel)); + } + for (i=0; i<p_Vid->pending_output->size_y_cr; i+=2) + { + memcpy(p_Vid->pending_output->imgUV[0][(i+add)], p->imgUV[0][(i+add)], p->size_x_cr * sizeof(imgpel)); + memcpy(p_Vid->pending_output->imgUV[1][(i+add)], p->imgUV[1][(i+add)], p->size_x_cr * sizeof(imgpel)); + } + p_Vid->pending_output_state = real_structure; + } + else + { + if ( (p_Vid->pending_output->size_x!=p->size_x) || (p_Vid->pending_output->size_y!= p->size_y) + || (p_Vid->pending_output->frame_mbs_only_flag != p->frame_mbs_only_flag) + || (p_Vid->pending_output->frame_cropping_flag != p->frame_cropping_flag) + || ( p_Vid->pending_output->frame_cropping_flag && + ( (p_Vid->pending_output->frame_cropping_rect_left_offset != p->frame_cropping_rect_left_offset) + ||(p_Vid->pending_output->frame_cropping_rect_right_offset != p->frame_cropping_rect_right_offset) + ||(p_Vid->pending_output->frame_cropping_rect_top_offset != p->frame_cropping_rect_top_offset) + ||(p_Vid->pending_output->frame_cropping_rect_bottom_offset != p->frame_cropping_rect_bottom_offset) + ) + ) + ) + { + flush_pending_output(p_Vid); + write_picture (p_Vid, p, real_structure); + return; + } + // copy second field + if (real_structure == TOP_FIELD) + { + add = 0; + } + else + { + add = 1; + } + + for (i=0; i<p_Vid->pending_output->size_y; i+=2) + { + memcpy(p_Vid->pending_output->imgY[(i+add)], p->imgY[(i+add)], p->size_x * sizeof(imgpel)); + } + for (i=0; i<p_Vid->pending_output->size_y_cr; i+=2) + { + memcpy(p_Vid->pending_output->imgUV[0][(i+add)], p->imgUV[0][(i+add)], p->size_x_cr * sizeof(imgpel)); + memcpy(p_Vid->pending_output->imgUV[1][(i+add)], p->imgUV[1][(i+add)], p->size_x_cr * sizeof(imgpel)); + } + + p_Vid->pending_output->time_code = p->time_code; + flush_pending_output(p_Vid); + } +} + +#else + +/*! + ************************************************************************ + * \brief + * Writes out a storable picture without doing any output modifications + * + * \param p_Vid + * image decoding parameters for current picture + * \param p + * Picture to be written + * \param p_out + * Output file + * \param real_structure + * real picture structure + ************************************************************************ + */ +static void write_picture(VideoParameters *p_Vid, StorablePicture *p, int real_structure) +{ + write_out_picture(p_Vid, p); +} + + +#endif + +/*! +************************************************************************ +* \brief +* Writes out a storable picture +* +* \param p_Vid +* image decoding parameters for current picture +* \param p +* Picture to be written +* \param p_out +* Output file +************************************************************************ +*/ +static void write_out_picture(VideoParameters *p_Vid, StorablePicture *p) +{ +#if 0 + InputParameters *p_Inp = p_Vid->p_Inp; + + static const int SubWidthC [4]= { 1, 2, 2, 1}; + static const int SubHeightC [4]= { 1, 2, 1, 1}; + + int crop_left, crop_right, crop_top, crop_bottom; + int symbol_size_in_bytes = (p_Vid->pic_unit_bitsize_on_disk >> 3); + Boolean rgb_output = (Boolean) (p_Vid->active_sps->vui_seq_parameters.matrix_coefficients==0); + unsigned char *buf; + + int ret; + + if (p->non_existing) + return; + + printf("*** Outputting poc %d, frame_num %d, frame_poc %d, pic_num %d\n", p->poc, p->frame_num, p->frame_poc, p->pic_num); + +#if (ENABLE_OUTPUT_TONEMAPPING) + // note: this tone-mapping is working for RGB format only. Sharp + if (p->seiHasTone_mapping && rgb_output) + { + //printf("output frame %d with tone model id %d\n", p->frame_num, p->tone_mapping_model_id); + symbol_size_in_bytes = (p->tonemapped_bit_depth>8)? 2 : 1; + tone_map(p->imgY, p->tone_mapping_lut, p->size_x, p->size_y); + tone_map(p->imgUV[0], p->tone_mapping_lut, p->size_x_cr, p->size_y_cr); + tone_map(p->imgUV[1], p->tone_mapping_lut, p->size_x_cr, p->size_y_cr); + } +#endif + + if (p->frame_cropping_flag) + { + crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset; + crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset; + crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + + //printf ("write frame size: %dx%d\n", p->size_x-crop_left-crop_right,p->size_y-crop_top-crop_bottom ); + initOutput(p_Vid, symbol_size_in_bytes); + + // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version + buf = malloc (p->size_x*p->size_y*symbol_size_in_bytes); + if (NULL==buf) + { + no_mem_exit("write_out_picture: buf"); + } + + if(rgb_output) + { + crop_left = p->frame_cropping_rect_left_offset; + crop_right = p->frame_cropping_rect_right_offset; + crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + + p_Vid->img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + ret = write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes); + if (ret != ((p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes)) + { + error ("write_out_picture: error writing to RGB file", 500); + } + + if (p->frame_cropping_flag) + { + crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset; + crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset; + crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + } + // write Y + p_Vid->img2buf (p->imgY, buf, p->size_x, p->size_y, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + ret = write(p_out, buf, (p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes); + if (ret != ((p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes)) + { + error ("write_out_picture: error writing to YUV file", 500); + } + + if (p->chroma_format_idc!=YUV400) + { + crop_left = p->frame_cropping_rect_left_offset; + crop_right = p->frame_cropping_rect_right_offset; + crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + + p_Vid->img2buf (p->imgUV[0], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + ret = write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes); + if (ret != ((p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes)) + { + error ("write_out_picture: error writing to YUV file", 500); + } + if (!rgb_output) + { + p_Vid->img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + ret = write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes); + if (ret != ((p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes)) + { + error ("write_out_picture: error writing to YUV file", 500); + } + } + } + else + { + if (p_Inp->write_uv) + { + int i,j; + imgpel cr_val = (imgpel) (1<<(p_Vid->bitdepth_luma - 1)); + + get_mem3Dpel (&(p->imgUV), 1, p->size_y/2, p->size_x/2); + for (j=0; j<p->size_y/2; j++) + for (i=0; i<p->size_x/2; i++) + p->imgUV[0][j][i]=cr_val; + + // fake out U=V=128 to make a YUV 4:2:0 stream + p_Vid->img2buf (p->imgUV[0], buf, p->size_x/2, p->size_y/2, symbol_size_in_bytes, crop_left/2, crop_right/2, crop_top/2, crop_bottom/2); + + ret = write(p_out, buf, symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2 ); + if (ret != (symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2)) + { + error ("write_out_picture: error writing to YUV file", 500); + } + ret = write(p_out, buf, symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2 ); + if (ret != (symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2)) + { + error ("write_out_picture: error writing to YUV file", 500); + } + + free_mem3Dpel(p->imgUV); + p->imgUV=NULL; + } + } + + free(buf); +#endif + if (p) + { + p->retain_count++; + out_storable_picture_add(p_Vid, p); + free_storable_picture(p_Vid, p); // release the reference we added above (out_storable_picture will add its own) + } +// fsync(p_out); +} + +/*! + ************************************************************************ + * \brief + * Initialize output buffer for direct output + ************************************************************************ + */ +void init_out_buffer(VideoParameters *p_Vid) +{ + p_Vid->out_buffer = alloc_frame_store(); + +#if (PAIR_FIELDS_IN_OUTPUT) + p_Vid->pending_output = calloc (sizeof(StorablePicture), 1); + if (NULL==p_Vid->pending_output) no_mem_exit("init_out_buffer"); + p_Vid->pending_output->imgUV = NULL; + p_Vid->pending_output->imgY = NULL; +#endif +} + +/*! + ************************************************************************ + * \brief + * Uninitialize output buffer for direct output + ************************************************************************ + */ +void uninit_out_buffer(VideoParameters *p_Vid) +{ + free_frame_store(p_Vid, p_Vid->out_buffer); + p_Vid->out_buffer=NULL; +#if (PAIR_FIELDS_IN_OUTPUT) + flush_pending_output(p_Vid); + free (p_Vid->pending_output); +#endif +} + +/*! + ************************************************************************ + * \brief + * Initialize picture memory with (Y:0,U:128,V:128) + ************************************************************************ + */ +void clear_picture(VideoParameters *p_Vid, StorablePicture *p) +{ + int i,j; + + for(i=0;i<p->size_y;i++) + { + for (j=0; j<p->size_x; j++) + p->imgY->img[i][j] = (imgpel) p_Vid->dc_pred_value_comp[0]; + } + for(i=0;i<p->size_y_cr;i++) + { + for (j=0; j<p->size_x_cr; j++) + p->imgUV[0]->img[i][j] = (imgpel) p_Vid->dc_pred_value_comp[1]; + } + for(i=0;i<p->size_y_cr;i++) + { + for (j=0; j<p->size_x_cr; j++) + p->imgUV[1]->img[i][j] = (imgpel) p_Vid->dc_pred_value_comp[2]; + } +} + +/*! + ************************************************************************ + * \brief + * Write out not paired direct output fields. A second empty field is generated + * and combined into the frame buffer. + * + * \param p_Vid + * image decoding parameters for current picture + * \param fs + * FrameStore that contains a single field + * \param p_out + * Output file + ************************************************************************ + */ +static void write_unpaired_field(VideoParameters *p_Vid, FrameStore* fs) +{ + StorablePicture *p; + assert (fs->is_used<3); + + if(fs->is_used & 0x01) + { + // we have a top field + // construct an empty bottom field + p = fs->top_field; + fs->bottom_field = alloc_storable_picture(p_Vid, BOTTOM_FIELD, p->size_x, 2*p->size_y, p->size_x_cr, 2*p->size_y_cr); + fs->bottom_field->chroma_format_idc = p->chroma_format_idc; + clear_picture(p_Vid, fs->bottom_field); + dpb_combine_field_yuv(p_Vid, fs); + write_picture (p_Vid, fs->frame, TOP_FIELD); + } + + if(fs->is_used & 0x02) + { + // we have a bottom field + // construct an empty top field + p = fs->bottom_field; + fs->top_field = alloc_storable_picture(p_Vid, TOP_FIELD, p->size_x, 2*p->size_y, p->size_x_cr, 2*p->size_y_cr); + fs->top_field->chroma_format_idc = p->chroma_format_idc; + clear_picture(p_Vid, fs->top_field); + fs ->top_field->frame_cropping_flag = fs->bottom_field->frame_cropping_flag; + if(fs ->top_field->frame_cropping_flag) + { + fs ->top_field->frame_cropping_rect_top_offset = fs->bottom_field->frame_cropping_rect_top_offset; + fs ->top_field->frame_cropping_rect_bottom_offset = fs->bottom_field->frame_cropping_rect_bottom_offset; + fs ->top_field->frame_cropping_rect_left_offset = fs->bottom_field->frame_cropping_rect_left_offset; + fs ->top_field->frame_cropping_rect_right_offset = fs->bottom_field->frame_cropping_rect_right_offset; + } + dpb_combine_field_yuv(p_Vid, fs); + write_picture (p_Vid, fs->frame, BOTTOM_FIELD); + } + + fs->is_used = 3; +} + +/*! + ************************************************************************ + * \brief + * Write out unpaired fields from output buffer. + * + * \param p_Vid + * image decoding parameters for current picture + * \param p_out + * Output file + ************************************************************************ + */ +static void flush_direct_output(VideoParameters *p_Vid) +{ + write_unpaired_field(p_Vid, p_Vid->out_buffer); + + free_storable_picture(p_Vid, p_Vid->out_buffer->frame); + p_Vid->out_buffer->frame = NULL; + free_storable_picture(p_Vid, p_Vid->out_buffer->top_field); + p_Vid->out_buffer->top_field = NULL; + free_storable_picture(p_Vid, p_Vid->out_buffer->bottom_field); + p_Vid->out_buffer->bottom_field = NULL; + p_Vid->out_buffer->is_used = 0; +} + + +/*! + ************************************************************************ + * \brief + * Write a frame (from FrameStore) + * + * \param p_Vid + * image decoding parameters for current picture + * \param fs + * FrameStore containing the frame + * \param p_out + * Output file + ************************************************************************ + */ +void write_stored_frame( VideoParameters *p_Vid, FrameStore *fs) +{ + // make sure no direct output field is pending + flush_direct_output(p_Vid); + + if (fs->is_used<3) + { + write_unpaired_field(p_Vid, fs); + } + else + { + if (fs->recovery_frame) + p_Vid->recovery_flag = 1; + if ((!p_Vid->non_conforming_stream) || p_Vid->recovery_flag) + write_picture(p_Vid, fs->frame, FRAME); + } + + fs->is_output = 1; +} + +/*! + ************************************************************************ + * \brief + * Directly output a picture without storing it in the DPB. Fields + * are buffered before they are written to the file. + * + * \param p_Vid + * image decoding parameters for current picture + * \param p + * Picture for output + * \param p_out + * Output file + ************************************************************************ + */ +void direct_output(VideoParameters *p_Vid, StorablePicture *p) +{ + InputParameters *p_Inp = p_Vid->p_Inp; + if (p->structure==FRAME) + { + // we have a frame (or complementary field pair) + // so output it directly + flush_direct_output(p_Vid); + write_picture (p_Vid, p, FRAME); + free_storable_picture(p_Vid, p); + return; + } + + if (p->structure == TOP_FIELD) + { + if (p_Vid->out_buffer->is_used &1) + flush_direct_output(p_Vid); + p_Vid->out_buffer->top_field = p; + p_Vid->out_buffer->is_used |= 1; + } + + if (p->structure == BOTTOM_FIELD) + { + if (p_Vid->out_buffer->is_used &2) + flush_direct_output(p_Vid); + p_Vid->out_buffer->bottom_field = p; + p_Vid->out_buffer->is_used |= 2; + } + + if (p_Vid->out_buffer->is_used == 3) + { + // we have both fields, so output them + dpb_combine_field_yuv(p_Vid, p_Vid->out_buffer); + p_Vid->out_buffer->frame->time_code = p->time_code; + write_picture (p_Vid, p_Vid->out_buffer->frame, FRAME); + + free_storable_picture(p_Vid, p_Vid->out_buffer->frame); + p_Vid->out_buffer->frame = NULL; + free_storable_picture(p_Vid, p_Vid->out_buffer->top_field); + p_Vid->out_buffer->top_field = NULL; + free_storable_picture(p_Vid, p_Vid->out_buffer->bottom_field); + p_Vid->out_buffer->bottom_field = NULL; + p_Vid->out_buffer->is_used = 0; + } +} + diff --git a/Src/h264dec/ldecod/src/parset.c b/Src/h264dec/ldecod/src/parset.c new file mode 100644 index 00000000..1b49cbd8 --- /dev/null +++ b/Src/h264dec/ldecod/src/parset.c @@ -0,0 +1,779 @@ + +/*! + ************************************************************************ + * \file + * parset.c + * \brief + * Parameter Sets + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger <stewe@cs.tu-berlin.de> + * + *********************************************************************** + */ + +#include "global.h" +#include "image.h" +#include "parsetcommon.h" +#include "parset.h" +#include "nalu.h" +#include "memalloc.h" +#include "fmo.h" +#include "cabac.h" +#include "vlc.h" +#include "mbuffer.h" +#include "erc_api.h" + +#if TRACE +#define SYMTRACESTRING(s) strncpy(sym->tracestring,s,TRACESTRING_SIZE) +#else +#define SYMTRACESTRING(s) // do nothing +#endif + + +extern void init_frext(VideoParameters *p_Vid); + +// syntax for scaling list matrix values +void Scaling_List(int *scalingList, int sizeOfScalingList, Boolean *UseDefaultScalingMatrix, Bitstream *s) +{ + int j, scanj; + int delta_scale, lastScale, nextScale; + + lastScale = 8; + nextScale = 8; + + for(j=0; j<sizeOfScalingList; j++) + { + scanj = (sizeOfScalingList==16) ? ZZ_SCAN[j]:ZZ_SCAN8[j]; + + if(nextScale!=0) + { + delta_scale = se_v ( " : delta_sl " , s); + nextScale = (lastScale + delta_scale + 256) % 256; + *UseDefaultScalingMatrix = (Boolean) (scanj==0 && nextScale==0); + } + + scalingList[scanj] = (nextScale==0) ? lastScale:nextScale; + lastScale = scalingList[scanj]; + } +} +// fill sps with content of p + +static void InterpretSPS (VideoParameters *p_Vid, DataPartition *p, seq_parameter_set_rbsp_t *sps) +{ + unsigned i; + unsigned n_ScalingList; + int reserved_zero; + Bitstream *s = p->bitstream; + + assert (p != NULL); + assert (p->bitstream != NULL); + assert (p->bitstream->streamBuffer != 0); + assert (sps != NULL); + + sps->profile_idc = u_v (8, "SPS: profile_idc" , s); + + if ((sps->profile_idc!=BASELINE ) && + (sps->profile_idc!=MAIN ) && + (sps->profile_idc!=EXTENDED ) && + (sps->profile_idc!=FREXT_HP ) && + (sps->profile_idc!=FREXT_Hi10P ) && + (sps->profile_idc!=FREXT_Hi422 ) && + (sps->profile_idc!=FREXT_Hi444 ) && + (sps->profile_idc!=FREXT_CAVLC444 )) + { + printf("Invalid Profile IDC (%d) encountered. \n", sps->profile_idc); + return; + } + + sps->constrained_set0_flag = u_1 ( "SPS: constrained_set0_flag" , s); + sps->constrained_set1_flag = u_1 ( "SPS: constrained_set1_flag" , s); + sps->constrained_set2_flag = u_1 ( "SPS: constrained_set2_flag" , s); + sps->constrained_set3_flag = u_1 ( "SPS: constrained_set3_flag" , s); + reserved_zero = u_v (4, "SPS: reserved_zero_4bits" , s); + assert (reserved_zero==0); + + sps->level_idc = u_v (8, "SPS: level_idc" , s); + + sps->seq_parameter_set_id = ue_v ("SPS: seq_parameter_set_id" , s); + + // Fidelity Range Extensions stuff + sps->chroma_format_idc = 1; + sps->bit_depth_luma_minus8 = 0; + sps->bit_depth_chroma_minus8 = 0; + p_Vid->lossless_qpprime_flag = 0; + sps->separate_colour_plane_flag = 0; + + if((IS_FREXT_PROFILE(sps->profile_idc)))/*==FREXT_HP ) || + (sps->profile_idc==FREXT_Hi10P) || + (sps->profile_idc==FREXT_Hi422) || + (sps->profile_idc==FREXT_Hi444) || + (sps->profile_idc==FREXT_CAVLC444))*/ + { + sps->chroma_format_idc = ue_v ("SPS: chroma_format_idc" , s); + + if(sps->chroma_format_idc == YUV444) + { + sps->separate_colour_plane_flag = u_1 ("SPS: separate_colour_plane_flag" , s); + } + + sps->bit_depth_luma_minus8 = ue_v ("SPS: bit_depth_luma_minus8" , s); + sps->bit_depth_chroma_minus8 = ue_v ("SPS: bit_depth_chroma_minus8" , s); + p_Vid->lossless_qpprime_flag = u_1 ("SPS: lossless_qpprime_y_zero_flag" , s); + + sps->seq_scaling_matrix_present_flag = u_1 ( "SPS: seq_scaling_matrix_present_flag" , s); + + if(sps->seq_scaling_matrix_present_flag) + { + n_ScalingList = (sps->chroma_format_idc != YUV444) ? 8 : 12; + for(i=0; i<n_ScalingList; i++) + { + sps->seq_scaling_list_present_flag[i] = u_1 ( "SPS: seq_scaling_list_present_flag" , s); + if(sps->seq_scaling_list_present_flag[i]) + { + if(i<6) + Scaling_List(sps->ScalingList4x4[i], 16, &sps->UseDefaultScalingMatrix4x4Flag[i], s); + else + Scaling_List(sps->ScalingList8x8[i-6], 64, &sps->UseDefaultScalingMatrix8x8Flag[i-6], s); + } + } + } + } + + sps->log2_max_frame_num_minus4 = ue_v ("SPS: log2_max_frame_num_minus4" , s); + sps->pic_order_cnt_type = ue_v ("SPS: pic_order_cnt_type" , s); + + if (sps->pic_order_cnt_type == 0) + sps->log2_max_pic_order_cnt_lsb_minus4 = ue_v ("SPS: log2_max_pic_order_cnt_lsb_minus4" , s); + else if (sps->pic_order_cnt_type == 1) + { + sps->delta_pic_order_always_zero_flag = u_1 ("SPS: delta_pic_order_always_zero_flag" , s); + sps->offset_for_non_ref_pic = se_v ("SPS: offset_for_non_ref_pic" , s); + sps->offset_for_top_to_bottom_field = se_v ("SPS: offset_for_top_to_bottom_field" , s); + sps->num_ref_frames_in_pic_order_cnt_cycle = ue_v ("SPS: num_ref_frames_in_pic_order_cnt_cycle" , s); + for(i=0; i<sps->num_ref_frames_in_pic_order_cnt_cycle; i++) + sps->offset_for_ref_frame[i] = se_v ("SPS: offset_for_ref_frame[i]" , s); + } + sps->num_ref_frames = ue_v ("SPS: num_ref_frames" , s); + sps->gaps_in_frame_num_value_allowed_flag = u_1 ("SPS: gaps_in_frame_num_value_allowed_flag" , s); + sps->pic_width_in_mbs_minus1 = ue_v ("SPS: pic_width_in_mbs_minus1" , s); + sps->pic_height_in_map_units_minus1 = ue_v ("SPS: pic_height_in_map_units_minus1" , s); + sps->frame_mbs_only_flag = u_1 ("SPS: frame_mbs_only_flag" , s); + if (!sps->frame_mbs_only_flag) + { + sps->mb_adaptive_frame_field_flag = u_1 ("SPS: mb_adaptive_frame_field_flag" , s); + } + sps->direct_8x8_inference_flag = u_1 ("SPS: direct_8x8_inference_flag" , s); + sps->frame_cropping_flag = u_1 ("SPS: frame_cropping_flag" , s); + + if (sps->frame_cropping_flag) + { + sps->frame_cropping_rect_left_offset = ue_v ("SPS: frame_cropping_rect_left_offset" , s); + sps->frame_cropping_rect_right_offset = ue_v ("SPS: frame_cropping_rect_right_offset" , s); + sps->frame_cropping_rect_top_offset = ue_v ("SPS: frame_cropping_rect_top_offset" , s); + sps->frame_cropping_rect_bottom_offset = ue_v ("SPS: frame_cropping_rect_bottom_offset" , s); + } + sps->vui_parameters_present_flag = (Boolean) u_1 ("SPS: vui_parameters_present_flag" , s); + + InitVUI(sps); + ReadVUI(p, sps); + + sps->Valid = TRUE; +} + + +void InitVUI(seq_parameter_set_rbsp_t *sps) +{ + sps->vui_seq_parameters.matrix_coefficients = 2; +} + + +int ReadVUI(DataPartition *p, seq_parameter_set_rbsp_t *sps) +{ + Bitstream *s = p->bitstream; + if (sps->vui_parameters_present_flag) + { + sps->vui_seq_parameters.aspect_ratio_info_present_flag = u_1 ("VUI: aspect_ratio_info_present_flag" , s); + if (sps->vui_seq_parameters.aspect_ratio_info_present_flag) + { + sps->vui_seq_parameters.aspect_ratio_idc = u_v ( 8, "VUI: aspect_ratio_idc" , s); + if (255==sps->vui_seq_parameters.aspect_ratio_idc) + { + sps->vui_seq_parameters.sar_width = (unsigned short) u_v (16, "VUI: sar_width" , s); + sps->vui_seq_parameters.sar_height = (unsigned short) u_v (16, "VUI: sar_height" , s); + } + } + + sps->vui_seq_parameters.overscan_info_present_flag = u_1 ("VUI: overscan_info_present_flag" , s); + if (sps->vui_seq_parameters.overscan_info_present_flag) + { + sps->vui_seq_parameters.overscan_appropriate_flag = u_1 ("VUI: overscan_appropriate_flag" , s); + } + + sps->vui_seq_parameters.video_signal_type_present_flag = u_1 ("VUI: video_signal_type_present_flag" , s); + if (sps->vui_seq_parameters.video_signal_type_present_flag) + { + sps->vui_seq_parameters.video_format = u_v ( 3,"VUI: video_format" , s); + sps->vui_seq_parameters.video_full_range_flag = u_1 ( "VUI: video_full_range_flag" , s); + sps->vui_seq_parameters.colour_description_present_flag = u_1 ( "VUI: color_description_present_flag" , s); + if(sps->vui_seq_parameters.colour_description_present_flag) + { + sps->vui_seq_parameters.colour_primaries = u_v ( 8,"VUI: colour_primaries" , s); + sps->vui_seq_parameters.transfer_characteristics = u_v ( 8,"VUI: transfer_characteristics" , s); + sps->vui_seq_parameters.matrix_coefficients = u_v ( 8,"VUI: matrix_coefficients" , s); + } + } + sps->vui_seq_parameters.chroma_location_info_present_flag = u_1 ( "VUI: chroma_loc_info_present_flag" , s); + if(sps->vui_seq_parameters.chroma_location_info_present_flag) + { + sps->vui_seq_parameters.chroma_sample_loc_type_top_field = ue_v ( "VUI: chroma_sample_loc_type_top_field" , s); + sps->vui_seq_parameters.chroma_sample_loc_type_bottom_field = ue_v ( "VUI: chroma_sample_loc_type_bottom_field" , s); + } + sps->vui_seq_parameters.timing_info_present_flag = u_1 ("VUI: timing_info_present_flag" , s); + if (sps->vui_seq_parameters.timing_info_present_flag) + { + sps->vui_seq_parameters.num_units_in_tick = u_v (32,"VUI: num_units_in_tick" , s); + sps->vui_seq_parameters.time_scale = u_v (32,"VUI: time_scale" , s); + sps->vui_seq_parameters.fixed_frame_rate_flag = u_1 ( "VUI: fixed_frame_rate_flag" , s); + } + sps->vui_seq_parameters.nal_hrd_parameters_present_flag = u_1 ("VUI: nal_hrd_parameters_present_flag" , s); + if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + { + ReadHRDParameters(p, &(sps->vui_seq_parameters.nal_hrd_parameters)); + } + sps->vui_seq_parameters.vcl_hrd_parameters_present_flag = u_1 ("VUI: vcl_hrd_parameters_present_flag" , s); + if (sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + ReadHRDParameters(p, &(sps->vui_seq_parameters.vcl_hrd_parameters)); + } + if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag || sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + sps->vui_seq_parameters.low_delay_hrd_flag = u_1 ("VUI: low_delay_hrd_flag" , s); + } + sps->vui_seq_parameters.pic_struct_present_flag = u_1 ("VUI: pic_struct_present_flag " , s); + sps->vui_seq_parameters.bitstream_restriction_flag = u_1 ("VUI: bitstream_restriction_flag" , s); + if (sps->vui_seq_parameters.bitstream_restriction_flag) + { + sps->vui_seq_parameters.motion_vectors_over_pic_boundaries_flag = u_1 ("VUI: motion_vectors_over_pic_boundaries_flag", s); + sps->vui_seq_parameters.max_bytes_per_pic_denom = ue_v ("VUI: max_bytes_per_pic_denom" , s); + sps->vui_seq_parameters.max_bits_per_mb_denom = ue_v ("VUI: max_bits_per_mb_denom" , s); + sps->vui_seq_parameters.log2_max_mv_length_horizontal = ue_v ("VUI: log2_max_mv_length_horizontal" , s); + sps->vui_seq_parameters.log2_max_mv_length_vertical = ue_v ("VUI: log2_max_mv_length_vertical" , s); + sps->vui_seq_parameters.num_reorder_frames = ue_v ("VUI: num_reorder_frames" , s); + sps->vui_seq_parameters.max_dec_frame_buffering = ue_v ("VUI: max_dec_frame_buffering" , s); + } + } + + return 0; +} + + +int ReadHRDParameters(DataPartition *p, hrd_parameters_t *hrd) +{ + Bitstream *s = p->bitstream; + unsigned int SchedSelIdx; + + hrd->cpb_cnt_minus1 = ue_v ( "VUI: cpb_cnt_minus1" , s); + hrd->bit_rate_scale = u_v ( 4,"VUI: bit_rate_scale" , s); + hrd->cpb_size_scale = u_v ( 4,"VUI: cpb_size_scale" , s); + + for( SchedSelIdx = 0; SchedSelIdx <= hrd->cpb_cnt_minus1; SchedSelIdx++ ) + { + hrd->bit_rate_value_minus1[ SchedSelIdx ] = ue_v ( "VUI: bit_rate_value_minus1" , s); + hrd->cpb_size_value_minus1[ SchedSelIdx ] = ue_v ( "VUI: cpb_size_value_minus1" , s); + hrd->cbr_flag[ SchedSelIdx ] = u_1 ( "VUI: cbr_flag" , s); + } + + hrd->initial_cpb_removal_delay_length_minus1 = u_v ( 5,"VUI: initial_cpb_removal_delay_length_minus1" , s); + hrd->cpb_removal_delay_length_minus1 = u_v ( 5,"VUI: cpb_removal_delay_length_minus1" , s); + hrd->dpb_output_delay_length_minus1 = u_v ( 5,"VUI: dpb_output_delay_length_minus1" , s); + hrd->time_offset_length = u_v ( 5,"VUI: time_offset_length" , s); + + return 0; +} + + +static void InterpretPPS (VideoParameters *p_Vid, DataPartition *p, pic_parameter_set_rbsp_t *pps) +{ + unsigned i; + unsigned n_ScalingList; + int chroma_format_idc; + int NumberBitsPerSliceGroupId; + Bitstream *s = p->bitstream; + + assert (p != NULL); + assert (p->bitstream != NULL); + assert (p->bitstream->streamBuffer != 0); + assert (pps != NULL); + + pps->pic_parameter_set_id = ue_v ("PPS: pic_parameter_set_id" , s); + pps->seq_parameter_set_id = ue_v ("PPS: seq_parameter_set_id" , s); + pps->entropy_coding_mode_flag = u_1 ("PPS: entropy_coding_mode_flag" , s); + + //! Note: as per JVT-F078 the following bit is unconditional. If F078 is not accepted, then + //! one has to fetch the correct SPS to check whether the bit is present (hopefully there is + //! no consistency problem :-( + //! The current encoder code handles this in the same way. When you change this, don't forget + //! the encoder! StW, 12/8/02 + pps->bottom_field_pic_order_in_frame_present_flag = u_1 ("PPS: bottom_field_pic_order_in_frame_present_flag" , s); + + pps->num_slice_groups_minus1 = ue_v ("PPS: num_slice_groups_minus1" , s); + + // FMO stuff begins here + if (pps->num_slice_groups_minus1 > 0) + { + pps->slice_group_map_type = ue_v ("PPS: slice_group_map_type" , s); + if (pps->slice_group_map_type == 0) + { + for (i=0; i<=pps->num_slice_groups_minus1; i++) + pps->run_length_minus1 [i] = ue_v ("PPS: run_length_minus1 [i]" , s); + } + else if (pps->slice_group_map_type == 2) + { + for (i=0; i<pps->num_slice_groups_minus1; i++) + { + //! JVT-F078: avoid reference of SPS by using ue(v) instead of u(v) + pps->top_left [i] = ue_v ("PPS: top_left [i]" , s); + pps->bottom_right [i] = ue_v ("PPS: bottom_right [i]" , s); + } + } + else if (pps->slice_group_map_type == 3 || + pps->slice_group_map_type == 4 || + pps->slice_group_map_type == 5) + { + pps->slice_group_change_direction_flag = u_1 ("PPS: slice_group_change_direction_flag" , s); + pps->slice_group_change_rate_minus1 = ue_v ("PPS: slice_group_change_rate_minus1" , s); + } + else if (pps->slice_group_map_type == 6) + { + if (pps->num_slice_groups_minus1+1 >4) + NumberBitsPerSliceGroupId = 3; + else if (pps->num_slice_groups_minus1+1 > 2) + NumberBitsPerSliceGroupId = 2; + else + NumberBitsPerSliceGroupId = 1; + pps->pic_size_in_map_units_minus1 = ue_v ("PPS: pic_size_in_map_units_minus1" , s); + if ((pps->slice_group_id = calloc (pps->pic_size_in_map_units_minus1+1, 1)) == NULL) + no_mem_exit ("InterpretPPS: slice_group_id"); + for (i=0; i<=pps->pic_size_in_map_units_minus1; i++) + pps->slice_group_id[i] = (byte) u_v (NumberBitsPerSliceGroupId, "slice_group_id[i]", s); + } + } + + // End of FMO stuff + + pps->num_ref_idx_l0_active_minus1 = ue_v ("PPS: num_ref_idx_l0_active_minus1" , s); + pps->num_ref_idx_l1_active_minus1 = ue_v ("PPS: num_ref_idx_l1_active_minus1" , s); + pps->weighted_pred_flag = u_1 ("PPS: weighted_pred_flag" , s); + pps->weighted_bipred_idc = u_v ( 2, "PPS: weighted_bipred_idc" , s); + pps->pic_init_qp_minus26 = se_v ("PPS: pic_init_qp_minus26" , s); + pps->pic_init_qs_minus26 = se_v ("PPS: pic_init_qs_minus26" , s); + + pps->chroma_qp_index_offset = se_v ("PPS: chroma_qp_index_offset" , s); + + pps->deblocking_filter_control_present_flag = u_1 ("PPS: deblocking_filter_control_present_flag" , s); + pps->constrained_intra_pred_flag = u_1 ("PPS: constrained_intra_pred_flag" , s); + pps->redundant_pic_cnt_present_flag = u_1 ("PPS: redundant_pic_cnt_present_flag" , s); + + if(more_rbsp_data(s->streamBuffer, s->frame_bitoffset,s->bitstream_length)) // more_data_in_rbsp() + { + //Fidelity Range Extensions Stuff + pps->transform_8x8_mode_flag = u_1 ("PPS: transform_8x8_mode_flag" , s); + pps->pic_scaling_matrix_present_flag = u_1 ("PPS: pic_scaling_matrix_present_flag" , s); + + if(pps->pic_scaling_matrix_present_flag) + { + chroma_format_idc = p_Vid->SeqParSet[pps->seq_parameter_set_id].chroma_format_idc; + n_ScalingList = 6 + ((chroma_format_idc != YUV444) ? 2 : 6) * pps->transform_8x8_mode_flag; + for(i=0; i<n_ScalingList; i++) + { + pps->pic_scaling_list_present_flag[i]= u_1 ("PPS: pic_scaling_list_present_flag" , s); + + if(pps->pic_scaling_list_present_flag[i]) + { + if(i<6) + Scaling_List(pps->ScalingList4x4[i], 16, &pps->UseDefaultScalingMatrix4x4Flag[i], s); + else + Scaling_List(pps->ScalingList8x8[i-6], 64, &pps->UseDefaultScalingMatrix8x8Flag[i-6], s); + } + } + } + pps->second_chroma_qp_index_offset = se_v ("PPS: second_chroma_qp_index_offset" , s); + } + else + { + pps->second_chroma_qp_index_offset = pps->chroma_qp_index_offset; + } + + pps->Valid = TRUE; +} + + +void PPSConsistencyCheck (pic_parameter_set_rbsp_t *pps) +{ + printf ("Consistency checking a picture parset, to be implemented\n"); +// if (pps->seq_parameter_set_id invalid then do something) +} + +void SPSConsistencyCheck (seq_parameter_set_rbsp_t *sps) +{ + printf ("Consistency checking a sequence parset, to be implemented\n"); +} + +void MakePPSavailable (VideoParameters *p_Vid, int id, pic_parameter_set_rbsp_t *pps) +{ + assert (pps->Valid == TRUE); + + if (p_Vid->PicParSet[id].Valid == TRUE && p_Vid->PicParSet[id].slice_group_id != NULL) + free (p_Vid->PicParSet[id].slice_group_id); + + memcpy (&p_Vid->PicParSet[id], pps, sizeof (pic_parameter_set_rbsp_t)); + + // we can simply use the memory provided with the pps. the PPS is destroyed after this function + // call and will not try to free if pps->slice_group_id == NULL + p_Vid->PicParSet[id].slice_group_id = pps->slice_group_id; + pps->slice_group_id = NULL; +} + +void CleanUpPPS(VideoParameters *p_Vid) +{ + int i; + + for (i=0; i<MAXPPS; i++) + { + if (p_Vid->PicParSet[i].Valid == TRUE && p_Vid->PicParSet[i].slice_group_id != NULL) + free (p_Vid->PicParSet[i].slice_group_id); + + p_Vid->PicParSet[i].Valid = FALSE; + } +} + + +void MakeSPSavailable (VideoParameters *p_Vid, int id, seq_parameter_set_rbsp_t *sps) +{ + assert (sps->Valid == TRUE); + memcpy (&p_Vid->SeqParSet[id], sps, sizeof (seq_parameter_set_rbsp_t)); +} + +void ProcessSPS_Memory(VideoParameters *p_Vid, const void *buffer, size_t bufferlen) +{ + +} + +void ProcessSPS(VideoParameters *p_Vid, NALU_t *nalu) +{ + DataPartition *dp = AllocPartition(1); + seq_parameter_set_rbsp_t *sps = AllocSPS(); + + //memcpy (dp->bitstream->streamBuffer, buffer, bufferlen); + dp->bitstream->streamBuffer = &nalu->buf[1]; + dp->bitstream->code_len = dp->bitstream->bitstream_length = RBSPtoSODB (dp->bitstream->streamBuffer, nalu->len-1); + dp->bitstream->read_len = dp->bitstream->frame_bitoffset = 0; + InterpretSPS (p_Vid, dp, sps); + + if (sps->Valid) + { + if (p_Vid->active_sps) + { + if (sps->seq_parameter_set_id == p_Vid->active_sps->seq_parameter_set_id) + { + if (!sps_is_equal(sps, p_Vid->active_sps)) + { + if (p_Vid->dec_picture) + { + // this may only happen on slice loss + exit_picture(p_Vid, &p_Vid->dec_picture); + } + p_Vid->active_sps=NULL; + } + } + } + // SPSConsistencyCheck (pps); + MakeSPSavailable (p_Vid, sps->seq_parameter_set_id, sps); + p_Vid->profile_idc = sps->profile_idc; + p_Vid->separate_colour_plane_flag = sps->separate_colour_plane_flag; + if( p_Vid->separate_colour_plane_flag ) + { + p_Vid->ChromaArrayType = 0; + } + else + { + p_Vid->ChromaArrayType = sps->chroma_format_idc; + } + } + + FreePartition (dp, 1); + FreeSPS (sps); + +} + +void ProcessPPS (VideoParameters *p_Vid, NALU_t *nalu) +{ + DataPartition *dp = AllocPartition(1); + pic_parameter_set_rbsp_t *pps = AllocPPS(); + + //memcpy (dp->bitstream->streamBuffer, &nalu->buf[1], nalu->len-1); + dp->bitstream->streamBuffer = &nalu->buf[1]; + dp->bitstream->code_len = dp->bitstream->bitstream_length = RBSPtoSODB (dp->bitstream->streamBuffer, nalu->len-1); + dp->bitstream->read_len = dp->bitstream->frame_bitoffset = 0; + InterpretPPS (p_Vid, dp, pps); + // PPSConsistencyCheck (pps); + if (p_Vid->active_pps) + { + if (pps->pic_parameter_set_id == p_Vid->active_pps->pic_parameter_set_id) + { + if (!pps_is_equal(pps, p_Vid->active_pps)) + { + if (p_Vid->dec_picture) + { + // this may only happen on slice loss + exit_picture(p_Vid, &p_Vid->dec_picture); + } + p_Vid->active_pps = NULL; + } + } + } + MakePPSavailable (p_Vid, pps->pic_parameter_set_id, pps); + FreePartition (dp, 1); + FreePPS (pps); +} + +/*! + ************************************************************************ + * \brief + * Updates images max values + * + ************************************************************************ + */ +static void updateMaxValue(FrameFormat *format) +{ + format->max_value[0] = (1 << format->bit_depth[0]) - 1; + format->max_value_sq[0] = format->max_value[0] * format->max_value[0]; + format->max_value[1] = (1 << format->bit_depth[1]) - 1; + format->max_value_sq[1] = format->max_value[1] * format->max_value[1]; + format->max_value[2] = (1 << format->bit_depth[2]) - 1; + format->max_value_sq[2] = format->max_value[2] * format->max_value[2]; +} + +/*! + ************************************************************************ + * \brief + * Reset format information + * + ************************************************************************ + */ +static void reset_format_info(seq_parameter_set_rbsp_t *sps, VideoParameters *p_Vid, FrameFormat *output) +{ + InputParameters *p_Inp = p_Vid->p_Inp; + static const int SubWidthC [4]= { 1, 2, 2, 1}; + static const int SubHeightC [4]= { 1, 2, 1, 1}; + + int crop_left, crop_right; + int crop_top, crop_bottom; + + // cropping for luma + if (sps->frame_cropping_flag) + { + crop_left = SubWidthC [sps->chroma_format_idc] * sps->frame_cropping_rect_left_offset; + crop_right = SubWidthC [sps->chroma_format_idc] * sps->frame_cropping_rect_right_offset; + crop_top = SubHeightC[sps->chroma_format_idc] * ( 2 - sps->frame_mbs_only_flag ) * sps->frame_cropping_rect_top_offset; + crop_bottom = SubHeightC[sps->chroma_format_idc] * ( 2 - sps->frame_mbs_only_flag ) * sps->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + + output->width_crop = p_Vid->width - crop_left - crop_right; + output->height_crop = p_Vid->height - crop_top - crop_bottom; + + output->width = p_Vid->width; + output->height = p_Vid->height; + + output->width_cr = p_Vid->width_cr; + output->height_cr = p_Vid->height_cr; + + // output size (excluding padding) + output->size_cmp[0] = output->width * output->height; + output->size_cmp[1] = output->width_cr * output->height_cr; + output->size_cmp[2] = output->size_cmp[1]; + output->size = output->size_cmp[0] + output->size_cmp[1] + output->size_cmp[2]; + output->mb_width = output->width / MB_BLOCK_SIZE; + output->mb_height = output->height / MB_BLOCK_SIZE; + + output->bit_depth[0] = p_Vid->bitdepth_luma; + output->bit_depth[1] = p_Vid->bitdepth_chroma; + output->bit_depth[2] = p_Vid->bitdepth_chroma; + + output->yuv_format = (ColorFormat) sps->chroma_format_idc; + + updateMaxValue(output); +} + +/*! + ************************************************************************ + * \brief + * Activate Sequence Parameter Sets + * + ************************************************************************ + */ +void activate_sps (VideoParameters *p_Vid, seq_parameter_set_rbsp_t *sps) +{ + InputParameters *p_Inp = p_Vid->p_Inp; + + if (p_Vid->active_sps != sps) + { + if (p_Vid->dec_picture) + { + // this may only happen on slice loss + exit_picture(p_Vid, &p_Vid->dec_picture); + } + p_Vid->active_sps = sps; + + p_Vid->bitdepth_chroma = 0; + p_Vid->width_cr = 0; + p_Vid->height_cr = 0; + + // maximum vertical motion vector range in luma quarter pixel units + if (p_Vid->active_sps->level_idc <= 10) + { + p_Vid->max_vmv_r = 64 * 4; + } + else if (p_Vid->active_sps->level_idc <= 20) + { + p_Vid->max_vmv_r = 128 * 4; + } + else if (p_Vid->active_sps->level_idc <= 30) + { + p_Vid->max_vmv_r = 256 * 4; + } + else + { + p_Vid->max_vmv_r = 512 * 4; // 512 pixels in quarter pixels + } + + // Fidelity Range Extensions stuff (part 1) + p_Vid->bitdepth_luma = (short) (sps->bit_depth_luma_minus8 + 8); + p_Vid->bitdepth_scale[0] = 1 << sps->bit_depth_luma_minus8; + if (sps->chroma_format_idc != YUV400) + { + p_Vid->bitdepth_chroma = (short) (sps->bit_depth_chroma_minus8 + 8); + p_Vid->bitdepth_scale[1] = 1 << sps->bit_depth_chroma_minus8; + } + + p_Vid->MaxFrameNum = 1<<(sps->log2_max_frame_num_minus4+4); + p_Vid->PicWidthInMbs = (sps->pic_width_in_mbs_minus1 +1); + p_Vid->PicHeightInMapUnits = (sps->pic_height_in_map_units_minus1 +1); + p_Vid->FrameHeightInMbs = ( 2 - sps->frame_mbs_only_flag ) * p_Vid->PicHeightInMapUnits; + p_Vid->FrameSizeInMbs = p_Vid->PicWidthInMbs * p_Vid->FrameHeightInMbs; + + p_Vid->yuv_format=sps->chroma_format_idc; + + p_Vid->width = p_Vid->PicWidthInMbs * MB_BLOCK_SIZE; + p_Vid->height = p_Vid->FrameHeightInMbs * MB_BLOCK_SIZE; + + if (sps->chroma_format_idc == YUV420) + { + p_Vid->width_cr = (p_Vid->width >> 1); + p_Vid->height_cr = (p_Vid->height >> 1); + } + else if (sps->chroma_format_idc == YUV422) + { + p_Vid->width_cr = (p_Vid->width >> 1); + p_Vid->height_cr = p_Vid->height; + } + else if (sps->chroma_format_idc == YUV444) + { + //YUV444 + p_Vid->width_cr = p_Vid->width; + p_Vid->height_cr = p_Vid->height; + } + + init_frext(p_Vid); + init_global_buffers(p_Vid); + + if (!p_Vid->no_output_of_prior_pics_flag) + { + flush_dpb(p_Vid); + } + init_dpb(p_Vid); + + ercInit(p_Vid, p_Vid->width, p_Vid->height, 1); + image_cache_set_dimensions(&p_Vid->image_cache[0], p_Vid->width, p_Vid->height); + image_cache_set_dimensions(&p_Vid->image_cache[1], p_Vid->width_cr, p_Vid->height_cr); + motion_cache_set_dimensions(&p_Vid->motion_cache, p_Vid->width / BLOCK_SIZE, p_Vid->height / BLOCK_SIZE); + + } + + reset_format_info(sps, p_Vid, &p_Inp->output); + +} + +void activate_pps(VideoParameters *p_Vid, pic_parameter_set_rbsp_t *pps) +{ + if (p_Vid->active_pps != pps) + { + if (p_Vid->dec_picture) + { + // this may only happen on slice loss + exit_picture(p_Vid, &p_Vid->dec_picture); + } + + p_Vid->active_pps = pps; + + // Fidelity Range Extensions stuff (part 2) + p_Vid->Transform8x8Mode = pps->transform_8x8_mode_flag; + + } +} + +void UseParameterSet (Slice *currSlice, int PicParsetId) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + seq_parameter_set_rbsp_t *sps = &p_Vid->SeqParSet[p_Vid->PicParSet[PicParsetId].seq_parameter_set_id]; + pic_parameter_set_rbsp_t *pps = &p_Vid->PicParSet[PicParsetId]; + + if (p_Vid->PicParSet[PicParsetId].Valid != TRUE) + printf ("Trying to use an invalid (uninitialized) Picture Parameter Set with ID %d, expect the unexpected...\n", PicParsetId); + if (p_Vid->SeqParSet[p_Vid->PicParSet[PicParsetId].seq_parameter_set_id].Valid != TRUE) + printf ("PicParset %d references an invalid (uninitialized) Sequence Parameter Set with ID %d, expect the unexpected...\n", PicParsetId, (int) p_Vid->PicParSet[PicParsetId].seq_parameter_set_id); + + sps = &p_Vid->SeqParSet[p_Vid->PicParSet[PicParsetId].seq_parameter_set_id]; + + + // In theory, and with a well-designed software, the lines above + // are everything necessary. In practice, we need to patch many values + // in p_Vid-> (but no more in p_Inp-> -- these have been taken care of) + + // Sequence Parameter Set Stuff first + +// printf ("Using Picture Parameter set %d and associated Sequence Parameter Set %d\n", PicParsetId, p_Vid->PicParSet[PicParsetId].seq_parameter_set_id); + + if ((int) sps->pic_order_cnt_type < 0 || sps->pic_order_cnt_type > 2) // != 1 + { + printf ("invalid sps->pic_order_cnt_type = %d\n", (int) sps->pic_order_cnt_type); + error ("pic_order_cnt_type != 1", -1000); + } + + if (sps->pic_order_cnt_type == 1) + { + if(sps->num_ref_frames_in_pic_order_cnt_cycle >= MAXnum_ref_frames_in_pic_order_cnt_cycle) + { + error("num_ref_frames_in_pic_order_cnt_cycle too large",-1011); + } + } + + activate_sps(p_Vid, sps); + activate_pps(p_Vid, pps); + + // currSlice->dp_mode is set by read_new_slice (NALU first byte available there) + if (pps->entropy_coding_mode_flag == CAVLC) + { + currSlice->nal_startcode_follows = uvlc_startcode_follows; + } + else + { + currSlice->nal_startcode_follows = cabac_startcode_follows; + } +} + diff --git a/Src/h264dec/ldecod/src/prediction.asm b/Src/h264dec/ldecod/src/prediction.asm new file mode 100644 index 00000000..33d3d499 --- /dev/null +++ b/Src/h264dec/ldecod/src/prediction.asm @@ -0,0 +1,1626 @@ +.686 +.XMM +.model FLAT + +copy_image_data_16x16_stride@OptimizedFunctions = 32 +dec_picture@VideoParameters = 698192 +p_Slice@MacroBlock = 0 +plane_images@StorablePicture = 158512 +mb_rec@Slice = 1696 +mb_pred@Slice = 928 +cof@Slice = 2464 + +CONST SEGMENT +align 16 +const32 DW 020H, 020H, 020H, 020H, 020H, 020H, 020H, 020H +CONST ENDS + +; +; +; +; + +PUBLIC _weighted_bi_prediction4x4 +_TEXT SEGMENT +mb_pred = 4 +block_l0 = 8 +wp_scale_l0 = 12 +wp_scale_l1 = 16 +wp_offset = 20 +weight_denom = 24 +_weighted_bi_prediction4x4 PROC ; COMDAT + mov eax, DWORD PTR weight_denom[esp] + pxor mm0, mm0 + pshufw mm1, MMWORD PTR wp_scale_l0[esp], 0 + test eax, eax + pshufw mm2, MMWORD PTR wp_scale_l1[esp], 0 + pshufw mm3, MMWORD PTR wp_offset[esp], 0 + jle BI_PRED4x4@LEFT_SHIFT + + movd mm4, eax + lea ecx, DWORD PTR [eax-1] ; + mov edx, 1 + shl edx, cl + movd mm5, edx + mov eax, mb_pred[esp] + mov edx, block_l0[esp] + pshufw mm5, mm5, 0 + movd mm6, DWORD PTR 0[edx] ; block_l0 + movd mm7, DWORD PTR 0[eax] ; mb_pred + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + movd mm7, DWORD PTR 16[eax] ; mb_pred + paddw mm6, mm5 + psraw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 0[eax], mm6 + + movd mm6, DWORD PTR 16[edx] ; block_l0 + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + movd mm7, DWORD PTR 32[eax] ; mb_pred + paddw mm6, mm5 + psraw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 16[eax], mm6 + + movd mm6, DWORD PTR 32[edx] ; block_l0 + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + movd mm7, DWORD PTR 48[eax] ; mb_pred + paddw mm6, mm5 + psraw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 0[eax], mm6 + + movd mm6, DWORD PTR 48[edx] ; block_l0 + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + paddw mm6, mm5 + psraw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 48[eax], mm6 + ret 0 + +BI_PRED4x4@LEFT_SHIFT: + neg eax + movd mm4, eax + mov eax, mb_pred[esp] + mov edx, block_l0[esp] + movd mm6, DWORD PTR 0[edx] ; block_l0 + movd mm7, DWORD PTR 0[eax] ; mb_pred + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + movd mm7, DWORD PTR 16[eax] ; mb_pred + psllw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 0[eax], mm6 + + movd mm6, DWORD PTR 16[edx] ; block_l0 + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + movd mm7, DWORD PTR 32[eax] ; mb_pred + psllw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 16[eax], mm6 + + movd mm6, DWORD PTR 32[edx] ; block_l0 + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + movd mm7, DWORD PTR 48[eax] ; mb_pred + psllw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 0[eax], mm6 + + movd mm6, DWORD PTR 48[edx] ; block_l0 + punpcklbw mm6, mm0 + punpcklbw mm7, mm0 + pmullw mm6, mm1 + pmullw mm7, mm2 + paddw mm6, mm7 + psllw mm6, mm4 + paddw mm6, mm3 + packuswb mm6, mm6 + movd DWORD PTR 48[eax], mm6 + ret 0 +_weighted_bi_prediction4x4 ENDP +_TEXT ENDS + +PUBLIC _itrans4x4_mmx +_TEXT SEGMENT +_tblock$ = 4 ; size = 4 +_mb_pred$ = 8 ; size = 4 +_mb_rec$ = 12 ; size = 4 +_pos_x$ = 16 ; size = 4 +_pos_y$ = 20 ; size = 4 +_itrans4x4_mmx PROC ; COMDAT + + mov edx, DWORD PTR _pos_y$[esp] + shl edx, 4 + add edx, DWORD PTR _pos_x$[esp] + mov eax, DWORD PTR _tblock$[esp] + mov ecx, DWORD PTR _mb_pred$[esp] + add ecx, edx + add edx, DWORD PTR _mb_rec$[esp] +_itrans4x4_mmx_direct PROC ; COMDAT + ; load 4x4 matrix + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 8[eax] + movq mm2, MMWORD PTR 16[eax] + movq mm3, MMWORD PTR 24[eax] + + ; rotate 4x4 matrix + movq mm4, mm0 ; p0 = mm4 (copy) + punpcklwd mm0, mm2 ; r0 = mm0 + punpckhwd mm4, mm2 ; r2 = mm4 + movq mm5, mm1 ; p1 = mm5 (copy) + punpcklwd mm1, mm3 ; r1 = mm1 + punpckhwd mm5, mm3 ; r3 = mm5 + movq mm6, mm0 ; r0 = mm6 (copy) + punpcklwd mm0, mm1 ; t0 = mm0 + punpckhwd mm6, mm1 ; t1 = mm6 + movq mm1, mm4 ; r2 = mm1 (copy) + punpcklwd mm1, mm5 ; t2 = mm1 + punpckhwd mm4, mm5 ; t3 = mm4 + + movq mm2, mm0 ; mm2 = t0 (copy) + paddw mm0, mm1 ; mm0 = p0 + psubw mm2, mm1 ; mm2 = p1, mm1 available + movq mm5, mm6 ; mm5 = t1 (copy) + psraw mm5, 1 ; mm5 = (t1 >> 1) + psubw mm5, mm4 ; mm5 = p2 + psraw mm4, 1 ; mm4 = (t3 >> 1) + paddw mm6, mm4 ; mm6 = p3 + + movq mm3, mm0 ; mm3 = p0 (copy) + paddw mm0, mm6 ; mm0 = r0 + movq mm1, mm2 ; mm1 = p1 (copy) + paddw mm1, mm5 ; mm1 = r1 + psubw mm2, mm5 ; mm2 = r2, mm5 available + psubw mm3, mm6 ; mm3 = r3 + + ; rotate 4x4 matrix to set up for vertical + movq mm4, mm0 ; r0 = mm4 (copy) + punpcklwd mm0, mm2 ; p0 = mm0 + punpckhwd mm4, mm2 ; p2 = mm4 + movq mm5, mm1 ; r1 = mm5 (copy) + punpcklwd mm1, mm3 ; p1 = mm1 + punpckhwd mm5, mm3 ; p3 = mm5 + movq mm6, mm0 ; p0 = mm6 (copy) + punpcklwd mm0, mm1 ; t0 = mm0 + punpckhwd mm6, mm1 ; t1 = mm6 + movq mm1, mm4 ; p2 = mm1 (copy) + punpcklwd mm1, mm5 ; t2 = mm1 + punpckhwd mm4, mm5 ; t3 = mm4 + + movq mm2, mm0 ; mm2 = t0 (copy) + paddw mm0, mm1 ; mm0 = p0 + psubw mm2, mm1 ; mm2 = p1, mm1 available + movq mm5, mm6 ; mm5 = t1 (copy) + psraw mm5, 1 ; mm5 = (t1 >> 1) + psubw mm5, mm4 ; mm5 = p2 + psraw mm4, 1 ; mm4 = (t3 >> 1) + paddw mm6, mm4 ; mm6 = p3 + movq mm3, mm0 ; mm3 = p0 (copy) + paddw mm0, mm6 ; mm0 = r0 + movq mm1, mm2 ; mm1 = p1 (copy) + paddw mm1, mm5 ; mm1 = r1 + psubw mm2, mm5 ; mm2 = r2, mm5 available + psubw mm3, mm6 ; mm3 = r3 + + +; --- 4x4 iDCT done, now time to combine with mpr --- + + movq mm7, MMWORD PTR const32 + + paddw mm0, mm7 ; rres + 32 + psraw mm0, 6 ; (rres + 32) >> 6 + paddw mm1, mm7 ; rres + 32 + psraw mm1, 6 ; (rres + 32) >> 6 + paddw mm2, mm7 ; rres + 32 + psraw mm2, 6 ; (rres + 32) >> 6 + paddw mm3, mm7 ; rres + 32 + psraw mm3, 6 ; (rres + 32) >> 6 + + pxor mm7, mm7 + + ; convert mpr from unsigned char to short + movd mm4, DWORD PTR 0[ecx] + movd mm5, DWORD PTR 16[ecx] + movd mm6, DWORD PTR 32[ecx] + punpcklbw mm4, mm7 + punpcklbw mm5, mm7 + punpcklbw mm6, mm7 + paddsw mm4, mm0 ; pred_row + rres_row + movd mm0, DWORD PTR 48[ecx] ; reuse mm0 for mpr[3] + paddsw mm5, mm1 ; pred_row + rres_row + punpcklbw mm0, mm7 + paddsw mm6, mm2 ; pred_row + rres_row + paddsw mm0, mm3 ; pred_row + rres_row + ; results in mm4, mm5, mm6, mm0 + + ; move back to 8 bit + packuswb mm4, mm7 + packuswb mm5, mm7 + packuswb mm6, mm7 + packuswb mm0, mm7 + movd DWORD PTR 0[edx], mm4 + movd DWORD PTR 16[edx], mm5 + movd DWORD PTR 32[edx], mm6 + movd DWORD PTR 48[edx], mm0 + ret 0 + +_itrans4x4_mmx_direct ENDP +_itrans4x4_mmx ENDP +_TEXT ENDS + +EXTRN _itrans_sp:PROC +EXTRN _Inv_Residual_trans_4x4:PROC +PUBLIC _iMBtrans4x4 +EXTRN _opt:BYTE +_TEXT SEGMENT +_currSlice$ = -4 ; size = 4 +_mb_rec$166704 = 8 ; size = 4 +_currMB$ = 8 ; size = 4 +_curr_img$ = 12 ; size = 4 +_pl$ = 8 ; second parameter +_smb$ = 16 ; size = 4 +_iMBtrans4x4 PROC + push ecx + push ebx + push ebp + push esi +STACKOFFSET = 16 +; 408 : VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1]: dec_picture->imgY; + + mov esi, DWORD PTR _pl$[esp+STACKOFFSET] + push edi +STACKOFFSET = STACKOFFSET + 4 + mov edi, DWORD PTR _currMB$[esp+16] + mov ebp, DWORD PTR [edi+p_Slice@MacroBlock] ; ebp: currMB->p_Slice + mov eax, DWORD PTR [edi+4] + mov eax, DWORD PTR [eax+dec_picture@VideoParameters] ; eax: p_Vid->dec_picture; + mov DWORD PTR _currSlice$[esp+20], ebp + mov ecx, DWORD PTR [eax+esi*4+plane_images@StorablePicture] + mov DWORD PTR _curr_img$[esp+16], ecx + + cmp DWORD PTR _smb$[esp+16], 0 ; if (smb) +; 413 : { +; 414 : h264_short_block_t *blocks = currSlice->cof4[pl]; +; 415 : const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; +; 416 : +; 417 : itrans_sp(blocks[0], mb_pred, currMB, pl, 0, 0); + + je $LN4@iMBtrans4x + push 0 + push 0 + mov eax, esi + shl eax, 9 + lea ebx, DWORD PTR [eax+ebp+cof@Slice] + mov ecx, esi + shl ecx, 8 + lea ebp, DWORD PTR [ecx+ebp+mb_pred@Slice] + push esi + push ebp + push ebx + mov eax, edi + call _itrans_sp + +; 418 : itrans_sp(blocks[1], mb_pred, currMB, pl, 4, 0); + + push 0 + push 4 + push esi + lea edx, DWORD PTR [ebx+32] + push ebp + push edx + mov eax, edi + call _itrans_sp + +; 419 : itrans_sp(blocks[2], mb_pred, currMB, pl, 0, 4); + + push 4 + push 0 + push esi + lea eax, DWORD PTR [ebx+64] + push ebp + push eax + mov eax, edi + call _itrans_sp + +; 420 : itrans_sp(blocks[3], mb_pred, currMB, pl, 4, 4); + + push 4 + push 4 + push esi + lea ecx, DWORD PTR [ebx+96] + push ebp + push ecx + mov eax, edi + call _itrans_sp + add esp, 80 ; 00000050H + +; 421 : itrans_sp(blocks[4], mb_pred, currMB, pl, 8, 0); + + push 0 + push 8 + push esi + lea edx, DWORD PTR [ebx+128] + push ebp + push edx + mov eax, edi + call _itrans_sp + +; 422 : itrans_sp(blocks[5], mb_pred, currMB, pl, 12, 0); + + push 0 + push 12 ; 0000000cH + push esi + lea eax, DWORD PTR [ebx+160] + push ebp + push eax + mov eax, edi + call _itrans_sp + +; 423 : itrans_sp(blocks[6], mb_pred, currMB, pl, 8, 4); + + push 4 + push 8 + push esi + lea ecx, DWORD PTR [ebx+192] + push ebp + push ecx + mov eax, edi + call _itrans_sp + +; 424 : itrans_sp(blocks[7], mb_pred, currMB, pl, 12, 4); + + push 4 + push 12 ; 0000000cH + push esi + lea edx, DWORD PTR [ebx+224] + push ebp + push edx + mov eax, edi + call _itrans_sp + add esp, 80 ; 00000050H + +; 425 : itrans_sp(blocks[8], mb_pred, currMB, pl, 0, 8); + + push 8 + push 0 + push esi + lea eax, DWORD PTR [ebx+256] + push ebp + push eax + mov eax, edi + call _itrans_sp + +; 426 : itrans_sp(blocks[9], mb_pred, currMB, pl, 4, 8); + + push 8 + push 4 + push esi + push ebp + lea ecx, DWORD PTR [ebx+288] + push ecx + mov eax, edi + call _itrans_sp + +; 427 : itrans_sp(blocks[10], mb_pred, currMB, pl, 0, 12); + + push 12 ; 0000000cH + push 0 + push esi + lea edx, DWORD PTR [ebx+320] + push ebp + push edx + mov eax, edi + call _itrans_sp + +; 428 : itrans_sp(blocks[11], mb_pred, currMB, pl, 4, 12); + + push 12 ; 0000000cH + push 4 + push esi + lea eax, DWORD PTR [ebx+352] + push ebp + push eax + mov eax, edi + call _itrans_sp + add esp, 80 ; 00000050H + +; 429 : itrans_sp(blocks[12], mb_pred, currMB, pl, 8, 8); + + push 8 + push 8 + push esi + lea ecx, DWORD PTR [ebx+384] + push ebp + push ecx + mov eax, edi + call _itrans_sp + +; 430 : itrans_sp(blocks[13], mb_pred, currMB, pl, 12, 8); + + push 8 + push 12 ; 0000000cH + push esi + lea edx, DWORD PTR [ebx+416] + push ebp + push edx + mov eax, edi + call _itrans_sp + +; 431 : itrans_sp(blocks[14], mb_pred, currMB, pl, 8, 12); + + push 12 ; 0000000cH + push 8 + push esi + lea eax, DWORD PTR [ebx+448] + push ebp + push eax + mov eax, edi + call _itrans_sp + +; 432 : itrans_sp(blocks[15], mb_pred, currMB, pl, 12, 12); + + push 12 ; 0000000cH + push 12 ; 0000000cH + push esi + add ebx, 480 ; 000001e0H + push ebp + push ebx + mov eax, edi + call _itrans_sp + mov ebp, DWORD PTR _currSlice$[esp+100] + add esp, 80 ; 00000050H + jmp COPY_16x16 + +$LN4@iMBtrans4x: + +; 433 : } +; 434 : else if (currMB->is_lossless) + + cmp DWORD PTR [edi+84], 0 + je $LN2@iMBtrans4x + + push 0 + push 0 + +; 435 : { +; 436 : Inv_Residual_trans_4x4(currMB, pl, 0, 0); + + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 437 : Inv_Residual_trans_4x4(currMB, pl, 4, 0); + + push 0 + push 4 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 438 : Inv_Residual_trans_4x4(currMB, pl, 0, 4); + + push 4 + push 0 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 439 : Inv_Residual_trans_4x4(currMB, pl, 4, 4); + + push 4 + push 4 + push esi + push edi + call _Inv_Residual_trans_4x4 + add esp, 64 ; 00000040H + +; 440 : Inv_Residual_trans_4x4(currMB, pl, 8, 0); + + push 0 + push 8 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 441 : Inv_Residual_trans_4x4(currMB, pl, 12, 0); + + push 0 + push 12 ; 0000000cH + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 442 : Inv_Residual_trans_4x4(currMB, pl, 8, 4); + + push 4 + push 8 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 443 : Inv_Residual_trans_4x4(currMB, pl, 12, 4); + + push 4 + push 12 ; 0000000cH + push esi + push edi + call _Inv_Residual_trans_4x4 + add esp, 64 ; 00000040H + +; 444 : Inv_Residual_trans_4x4(currMB, pl, 0, 8); + + push 8 + push 0 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 445 : Inv_Residual_trans_4x4(currMB, pl, 4, 8); + + push 8 + push 4 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 446 : Inv_Residual_trans_4x4(currMB, pl, 0, 12); + + push 12 ; 0000000cH + push 0 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 447 : Inv_Residual_trans_4x4(currMB, pl, 4, 12); + + push 12 ; 0000000cH + push 4 + push esi + push edi + call _Inv_Residual_trans_4x4 + add esp, 64 ; 00000040H + +; 448 : Inv_Residual_trans_4x4(currMB, pl, 8, 8); + + push 8 + push 8 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 449 : Inv_Residual_trans_4x4(currMB, pl, 12, 8); + + push 8 + push 12 ; 0000000cH + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 450 : Inv_Residual_trans_4x4(currMB, pl, 8, 12); + + push 12 ; 0000000cH + push 8 + push esi + push edi + call _Inv_Residual_trans_4x4 + +; 451 : Inv_Residual_trans_4x4(currMB, pl, 12, 12); + + push 12 ; 0000000cH + push 12 ; 0000000cH + push esi + push edi + call _Inv_Residual_trans_4x4 + add esp, 64 ; 00000040H + +; 452 : } +; 453 : else + + jmp COPY_16x16 +$LN2@iMBtrans4x: + +; 454 : { +; 455 : const h264_short_block_t *blocks = currSlice->cof4[pl]; +; 456 : const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + + mov edx, esi + mov ecx, esi + shl edx, 8 + shl ecx, 9 + lea eax, DWORD PTR [edx+ebp] + lea ebx, DWORD PTR [ecx+ebp+cof@Slice] + +; 457 : h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + ; put things in registers that itrans4x4_mmx_direct wants + lea edx, [eax + mb_rec@Slice]; mb_rec + lea ecx, [eax + mb_pred@Slice] ; mb_pred + mov eax, ebx ; blocks + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+32] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+128] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+160] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0); + + ; second row + lea edx, [edx+52] + lea ecx, [ecx+52] + lea eax, [ebx+64] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+96] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+192] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+224] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4); + + ; third row + lea edx, [edx+52] + lea ecx, [ecx+52] + lea eax, [ebx+256] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+288] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+384] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+416] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8); + + ; fourth row + lea edx, [edx+52] + lea ecx, [ecx+52] + lea eax, [ebx+320] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+352] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+448] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12); + + lea edx, [edx+4] + lea ecx, [ecx+4] + lea eax, [ebx+480] + call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12); +COPY_16x16: + +; construct picture from 4x4 blocks +; opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_rec[pl]); + + mov eax, DWORD PTR [edi+40] + mov ecx, DWORD PTR [edi+36] + shl esi, 8 + lea edx, DWORD PTR [esi+ebp+mb_rec@Slice] + push edx + mov edx, DWORD PTR _curr_img$[esp+20] + push eax + push ecx + push edx + call DWORD PTR _opt+copy_image_data_16x16_stride@OptimizedFunctions + add esp, 16 ; 00000010H + pop edi + pop esi + pop ebp + pop ebx + pop ecx + ret 0 +_iMBtrans4x4 ENDP +_TEXT ENDS + +_TEXT SEGMENT + ALIGN 2 + PUBLIC _itrans8x8_sse2 +_itrans8x8_sse2 PROC NEAR +; parameter 1(mb_rec): 8 + ebp +; parameter 2(mb_pred): 12 + ebp +; parameter 3(block): 16 + ebp +; parameter 4(pos_x): 20 + ebp + push ebp + mov ebp, esp + and esp, -16 + sub esp, 176 + mov edx, DWORD PTR [ebp+20] + mov ecx, DWORD PTR [ebp+8] ; ecx: mb_rec + add ecx, edx + add edx, DWORD PTR [ebp+12] ; edx: mb_pred + mov eax, DWORD PTR [ebp+16] ; eax: block + +;;; __m128i a0, a1, a2, a3; +;;; __m128i p0, p1, p2, p3, p4, p5 ,p6, p7; +;;; __m128i b0, b1, b2, b3, b4, b5, b6, b7; +;;; __m128i r0, r1, r2, r3, r4, r5, r6, r7; +;;; __m128i const32, zero; +;;; __declspec(align(32)) static const int16_t c32[8] = {32, 32, 32, 32, 32, 32, 32, 32}; +;;; __m128i pred0, pred1; +;;; +;;; const32 = _mm_load_si128((const __m128i *)c32); + + movdqa xmm0, XMMWORD PTR const32 + +;;; zero = _mm_setzero_si128(); +;;; +;;; // Horizontal +;;; b0 = _mm_load_si128((__m128i *)(block[0])); + + movdqa xmm4, XMMWORD PTR [eax] + +;;; b1 = _mm_load_si128((__m128i *)(block[1])); + + movdqa xmm7, XMMWORD PTR [eax+16] + +;;; b2 = _mm_load_si128((__m128i *)(block[2])); + + movdqa xmm5, XMMWORD PTR [eax+32] + +;;; b3 = _mm_load_si128((__m128i *)(block[3])); + + movdqa xmm3, XMMWORD PTR [eax+48] + +;;; b4 = _mm_load_si128((__m128i *)(block[4])); + + movdqa xmm6, XMMWORD PTR [eax+64] + +;;; b5 = _mm_load_si128((__m128i *)(block[5])); +;;; b6 = _mm_load_si128((__m128i *)(block[6])); + + movdqa xmm1, XMMWORD PTR [eax+96] + +;;; b7 = _mm_load_si128((__m128i *)(block[7])); + + movdqa xmm2, XMMWORD PTR [eax+112] + movdqa XMMWORD PTR [esp], xmm0 + movdqa xmm0, XMMWORD PTR [eax+80] + movdqa XMMWORD PTR [esp+16], xmm2 + +;;; +;;; /* rotate 8x8 (ugh) */ +;;; r0 = _mm_unpacklo_epi16(b0, b2); + + movdqa xmm2, xmm4 + punpcklwd xmm2, xmm5 + +;;; r1 = _mm_unpacklo_epi16(b1, b3); +;;; r2 = _mm_unpackhi_epi16(b0, b2); + + punpckhwd xmm4, xmm5 + +;;; r3 = _mm_unpackhi_epi16(b1, b3); +;;; r4 = _mm_unpacklo_epi16(b4, b6); +;;; r5 = _mm_unpacklo_epi16(b5, b7); + + movdqa xmm5, xmm0 + movdqa XMMWORD PTR [esp+32], xmm2 + movdqa xmm2, xmm7 + punpcklwd xmm2, xmm3 + punpckhwd xmm7, xmm3 + movdqa xmm3, xmm6 + punpcklwd xmm3, xmm1 + movdqa XMMWORD PTR [esp+48], xmm3 + movdqa xmm3, XMMWORD PTR [esp+16] + punpcklwd xmm5, xmm3 + +;;; r6 = _mm_unpackhi_epi16(b4, b6); + + punpckhwd xmm6, xmm1 +;;; r7 = _mm_unpackhi_epi16(b5, b7); + + punpckhwd xmm0, xmm3 + +;;; +;;; b0 = _mm_unpacklo_epi16(r0, r1); + + movdqa xmm3, XMMWORD PTR [esp+32] + movdqa xmm1, xmm3 + punpcklwd xmm1, xmm2 + +;;; b1 = _mm_unpackhi_epi16(r0, r1); + + punpckhwd xmm3, xmm2 + +;;; b2 = _mm_unpacklo_epi16(r2, r3); + + movdqa xmm2, xmm4 + punpcklwd xmm2, xmm7 + +;;; b3 = _mm_unpackhi_epi16(r2, r3); + + punpckhwd xmm4, xmm7 + movdqa XMMWORD PTR [esp+64], xmm4 + +;;; b4 = _mm_unpacklo_epi16(r4, r5); + + movdqa xmm4, XMMWORD PTR [esp+48] + movdqa xmm7, xmm4 + punpcklwd xmm7, xmm5 + +;;; b5 = _mm_unpackhi_epi16(r4, r5); + + punpckhwd xmm4, xmm5 + +;;; b6 = _mm_unpacklo_epi16(r6, r7); + + movdqa xmm5, xmm6 + punpcklwd xmm5, xmm0 + +;;; b7 = _mm_unpackhi_epi16(r6, r7); + + punpckhwd xmm6, xmm0 + +;;; +;;; p0 = _mm_unpacklo_epi64(b0, b4); + + movdqa xmm0, xmm1 + punpcklqdq xmm0, xmm7 + +;;; p1 = _mm_unpackhi_epi64(b0, b4); + + punpckhqdq xmm1, xmm7 + movdqa XMMWORD PTR [esp+16], xmm1 + +;;; p2 = _mm_unpacklo_epi64(b1, b5); + + movdqa xmm1, xmm3 + punpcklqdq xmm1, xmm4 + +;;; p3 = _mm_unpackhi_epi64(b1, b5); +;;; p4 = _mm_unpacklo_epi64(b2, b6); +;;; p5 = _mm_unpackhi_epi64(b2, b6); +;;; p6 = _mm_unpacklo_epi64(b3, b7); +;;; p7 = _mm_unpackhi_epi64(b3, b7); +;;; +;;; /* perform approx DCT */ +;;; a0 = _mm_add_epi16(p0, p4); // p0 + p4 +;;; a1 = _mm_sub_epi16(p0, p4); // p0 - p4 +;;; r0 = _mm_srai_epi16(p2, 1); // p2 >> 1 + + movdqa xmm7, xmm1 + psraw xmm7, 1 + punpckhqdq xmm3, xmm4 + movdqa XMMWORD PTR [esp+32], xmm3 + movdqa xmm3, xmm2 + punpcklqdq xmm3, xmm5 + punpckhqdq xmm2, xmm5 + movdqa xmm5, XMMWORD PTR [esp+64] + movdqa xmm4, xmm5 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm6, xmm0 + paddw xmm6, xmm3 + psubw xmm0, xmm3 + +;;; a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1) + + movdqa xmm3, xmm4 + +;;; r0 = _mm_srai_epi16(p6, 1); // p6 >> 1 + + psraw xmm4, 1 + psubw xmm3, xmm7 + +;;; a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1) + + paddw xmm1, xmm4 + +;;; +;;; b0 = _mm_add_epi16(a0, a3); // a0 + a3; + + movdqa xmm4, xmm6 + +;;; b2 = _mm_sub_epi16(a1, a2); // a1 - a2; + + movdqa xmm7, xmm0 + paddw xmm4, xmm1 + psubw xmm7, xmm3 + movdqa XMMWORD PTR [esp+48], xmm7 + +;;; b4 = _mm_add_epi16(a1, a2); // a1 + a2; + + paddw xmm0, xmm3 + movdqa XMMWORD PTR [esp+80], xmm0 + +;;; b6 = _mm_sub_epi16(a0, a3); // a0 - a3; +;;; +;;; //-p3 + p5 - p7 - (p7 >> 1); +;;; r0 = _mm_srai_epi16(p7, 1); // p7 >> 1 +;;; a0 = _mm_sub_epi16(p5, p3); // p5 - p3 + + movdqa xmm0, XMMWORD PTR [esp+32] + psubw xmm6, xmm1 + movdqa xmm1, xmm5 + psraw xmm1, 1 + movdqa xmm3, xmm2 + +;;; a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7 +;;; a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1) +;;; +;;; //p1 + p7 - p3 - (p3 >> 1); +;;; r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1) + + movdqa xmm7, xmm0 + movdqa XMMWORD PTR [esp+96], xmm6 + +;;; a1 = _mm_add_epi16(p1, p7); // p1 + p7 + + movdqa xmm6, XMMWORD PTR [esp+16] + psubw xmm3, xmm0 + psubw xmm3, xmm5 + psraw xmm7, 1 + psubw xmm3, xmm1 + movdqa xmm1, xmm6 + paddw xmm1, xmm5 + +;;; a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3 + + psubw xmm1, xmm0 + +;;; a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1) + + psubw xmm1, xmm7 + +;;; +;;; // -p1 + p7 + p5 + (p5 >> 1); +;;; r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1) + + movdqa xmm7, xmm2 + psraw xmm7, 1 + +;;; a2 = _mm_sub_epi16(p7, p1); // p7 - p1 + + psubw xmm5, xmm6 + +;;; a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5 + + paddw xmm5, xmm2 + +;;; a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1) + + paddw xmm5, xmm7 + +;;; +;;; // p3 + p5 + p1 + (p1 >> 1); +;;; a3 = _mm_add_epi16(p3, p5); // p3+p5 + + paddw xmm0, xmm2 + +;;; a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1 +;;; p1 = _mm_srai_epi16(p1, 1); // p1 >> 1 +;;; a3 = _mm_add_epi16(a3, p1); //p3 + p5 + p1 + (p1 >> 1) +;;; +;;; r0 = _mm_srai_epi16(a3, 2); // a3>>2 +;;; b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2); +;;; r0 = _mm_srai_epi16(a2, 2); // a2>>2 +;;; b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2); +;;; a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe +;;; b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2); +;;; a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe +;;; b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2); +;;; +;;; p0 = _mm_add_epi16(b0, b7); // b0 + b7; +;;; p1 = _mm_sub_epi16(b2, b5); // b2 - b5; +;;; p2 = _mm_add_epi16(b4, b3); // b4 + b3; +;;; p3 = _mm_add_epi16(b6, b1); // b6 + b1; + + movdqa xmm2, XMMWORD PTR [esp+96] + paddw xmm0, xmm6 + psraw xmm6, 1 + paddw xmm0, xmm6 + movdqa xmm7, xmm0 + movdqa xmm6, xmm5 + psraw xmm7, 2 + paddw xmm7, xmm3 + psraw xmm6, 2 + paddw xmm6, xmm1 + psraw xmm1, 2 + psubw xmm5, xmm1 + movdqa xmm1, xmm4 + psraw xmm3, 2 + psubw xmm0, xmm3 + movdqa xmm3, XMMWORD PTR [esp+80] + movdqa XMMWORD PTR [esp+32], xmm0 + +;;; p4 = _mm_sub_epi16(b6, b1); // b6 - b1; +;;; p5 = _mm_sub_epi16(b4, b3); // b4 - b3; +;;; p6 = _mm_add_epi16(b2, b5); // b2 + b5; +;;; p7 = _mm_sub_epi16(b0, b7); // b0 - b7; + + psubw xmm4, XMMWORD PTR [esp+32] + paddw xmm1, xmm0 + movdqa XMMWORD PTR [esp+112], xmm1 + movdqa xmm1, XMMWORD PTR [esp+48] + movdqa xmm0, xmm1 + psubw xmm0, xmm5 + movdqa XMMWORD PTR [esp+16], xmm0 + movdqa xmm0, xmm3 + paddw xmm0, xmm6 + psubw xmm3, xmm6 + movdqa XMMWORD PTR [esp+128], xmm0 + +;;; +;;; /* rotate 8x8 (ugh) */ +;;; r0 = _mm_unpacklo_epi16(p0, p2); + + movdqa xmm6, XMMWORD PTR [esp+128] + movdqa xmm0, xmm2 + paddw xmm0, xmm7 + psubw xmm2, xmm7 + paddw xmm1, xmm5 + movdqa xmm5, XMMWORD PTR [esp+112] + movdqa XMMWORD PTR [esp+144], xmm4 + movdqa xmm4, xmm5 + punpcklwd xmm4, xmm6 + +;;; r1 = _mm_unpacklo_epi16(p1, p3); +;;; r2 = _mm_unpackhi_epi16(p0, p2); + + punpckhwd xmm5, xmm6 + +;;; r3 = _mm_unpackhi_epi16(p1, p3); +;;; r4 = _mm_unpacklo_epi16(p4, p6); +;;; r5 = _mm_unpacklo_epi16(p5, p7); + + movdqa xmm6, xmm3 + movdqa XMMWORD PTR [esp+64], xmm4 + movdqa xmm4, XMMWORD PTR [esp+16] + movdqa xmm7, xmm4 + punpcklwd xmm7, xmm0 + punpckhwd xmm4, xmm0 + movdqa xmm0, xmm2 + punpcklwd xmm0, xmm1 + movdqa XMMWORD PTR [esp+128], xmm0 + movdqa xmm0, XMMWORD PTR [esp+144] + punpcklwd xmm6, xmm0 + +;;; r6 = _mm_unpackhi_epi16(p4, p6); + + punpckhwd xmm2, xmm1 + +;;; r7 = _mm_unpackhi_epi16(p5, p7); +;;; +;;; b0 = _mm_unpacklo_epi16(r0, r1); + + movdqa xmm1, XMMWORD PTR [esp+64] + punpckhwd xmm3, xmm0 + movdqa xmm0, xmm1 + punpcklwd xmm0, xmm7 + +;;; b1 = _mm_unpackhi_epi16(r0, r1); + + punpckhwd xmm1, xmm7 + +;;; b2 = _mm_unpacklo_epi16(r2, r3); + + movdqa xmm7, xmm5 + punpcklwd xmm7, xmm4 + +;;; b3 = _mm_unpackhi_epi16(r2, r3); + + punpckhwd xmm5, xmm4 + movdqa XMMWORD PTR [esp+112], xmm5 + +;;; b4 = _mm_unpacklo_epi16(r4, r5); + + movdqa xmm5, XMMWORD PTR [esp+128] + movdqa xmm4, xmm5 + punpcklwd xmm4, xmm6 + +;;; b5 = _mm_unpackhi_epi16(r4, r5); + + punpckhwd xmm5, xmm6 + +;;; b6 = _mm_unpacklo_epi16(r6, r7); + + movdqa xmm6, xmm2 + punpcklwd xmm6, xmm3 + +;;; b7 = _mm_unpackhi_epi16(r6, r7); + + punpckhwd xmm2, xmm3 + +;;; +;;; p0 = _mm_unpacklo_epi64(b0, b4); + + movdqa xmm3, xmm0 + punpcklqdq xmm3, xmm4 + +;;; p1 = _mm_unpackhi_epi64(b0, b4); + + punpckhqdq xmm0, xmm4 + movdqa XMMWORD PTR [esp+144], xmm0 + +;;; p2 = _mm_unpacklo_epi64(b1, b5); +;;; p3 = _mm_unpackhi_epi64(b1, b5); +;;; p4 = _mm_unpacklo_epi64(b2, b6); +;;; p5 = _mm_unpackhi_epi64(b2, b6); +;;; p6 = _mm_unpacklo_epi64(b3, b7); + + movdqa xmm0, XMMWORD PTR [esp+112] + movdqa xmm4, xmm1 + punpcklqdq xmm4, xmm5 + punpckhqdq xmm1, xmm5 + movdqa XMMWORD PTR [esp+64], xmm1 + movdqa xmm1, xmm7 + movdqa xmm5, xmm0 + punpcklqdq xmm1, xmm6 + punpckhqdq xmm7, xmm6 + +;;; p7 = _mm_unpackhi_epi64(b3, b7); +;;; +;;; +;;; /* Vertical */ +;;; +;;; a0 = _mm_add_epi16(p0, p4); // p0 + p4 +;;; a1 = _mm_sub_epi16(p0, p4); // p0 - p4 +;;; r0 = _mm_srai_epi16(p2, 1); // p2 >> 1 + + movdqa xmm6, xmm4 + psraw xmm6, 1 + punpcklqdq xmm5, xmm2 + punpckhqdq xmm0, xmm2 + movdqa xmm2, xmm3 + paddw xmm2, xmm1 + psubw xmm3, xmm1 + +;;; a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1) + + movdqa xmm1, xmm5 + +;;; r0 = _mm_srai_epi16(p6, 1); // p6 >> 1 + + psraw xmm5, 1 + psubw xmm1, xmm6 + +;;; a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1) + + paddw xmm4, xmm5 + +;;; +;;; b0 = _mm_add_epi16(a0, a3); // a0 + a3; + + movdqa xmm5, xmm2 + +;;; b2 = _mm_sub_epi16(a1, a2); // a1 - a2; + + movdqa xmm6, xmm3 + paddw xmm5, xmm4 + psubw xmm6, xmm1 + movdqa XMMWORD PTR [esp+128], xmm6 + +;;; b4 = _mm_add_epi16(a1, a2); // a1 + a2; +;;; b6 = _mm_sub_epi16(a0, a3); // a0 - a3; +;;; +;;; //-p3 + p5 - p7 - (p7 >> 1); +;;; r0 = _mm_srai_epi16(p7, 1); // p7 >> 1 +;;; a0 = _mm_sub_epi16(p5, p3); // p5 - p3 + + movdqa xmm6, XMMWORD PTR [esp+64] + paddw xmm3, xmm1 + movdqa XMMWORD PTR [esp+80], xmm3 + psubw xmm2, xmm4 + movdqa xmm1, xmm0 + psraw xmm1, 1 + movdqa xmm3, xmm7 + movdqa XMMWORD PTR [esp+96], xmm2 + psubw xmm3, xmm6 + +;;; a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7 + + psubw xmm3, xmm0 + +;;; a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1) +;;; +;;; //p1 + p7 - p3 - (p3 >> 1); +;;; r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1) + + movdqa xmm2, xmm6 + psraw xmm2, 1 + psubw xmm3, xmm1 + +;;; a1 = _mm_add_epi16(p1, p7); // p1 + p7 + + movdqa xmm1, XMMWORD PTR [esp+144] + movdqa xmm4, xmm1 + paddw xmm4, xmm0 + +;;; a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3 + + psubw xmm4, xmm6 + +;;; a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1) + + psubw xmm4, xmm2 + +;;; +;;; // -p1 + p7 + p5 + (p5 >> 1); +;;; r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1) + + movdqa xmm2, xmm7 + psraw xmm2, 1 + +;;; a2 = _mm_sub_epi16(p7, p1); // p7 - p1 + + psubw xmm0, xmm1 + +;;; a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5 + + paddw xmm0, xmm7 + +;;; a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1) + + paddw xmm0, xmm2 + +;;; +;;; // p3 + p5 + p1 + (p1 >> 1); +;;; r0 = _mm_srai_epi16(p1, 1); // p1 >> 1 + + movdqa xmm2, xmm1 + psraw xmm2, 1 + +;;; a3 = _mm_add_epi16(p3, p5); // p3+p5 + + paddw xmm6, xmm7 + +;;; a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1 +;;; a3 = _mm_add_epi16(a3, r0); //p3 + p5 + p1 + (p1 >> 1) +;;; +;;; r0 = _mm_srai_epi16(a3, 2); // a3>>2 +;;; b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2); +;;; r0 = _mm_srai_epi16(a2, 2); // a2>>2 +;;; b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2); +;;; a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe +;;; b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2); +;;; a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe +;;; b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2); +;;; +;;; r0 = _mm_add_epi16(b0, b7); // b0 + b7; +;;; r1 = _mm_sub_epi16(b2, b5); // b2 - b5; + + movdqa xmm7, XMMWORD PTR [esp+128] + paddw xmm6, xmm1 + paddw xmm6, xmm2 + movdqa xmm1, xmm6 + psraw xmm1, 2 + movdqa xmm2, xmm0 + paddw xmm1, xmm3 + psraw xmm2, 2 + paddw xmm2, xmm4 + psraw xmm4, 2 + psubw xmm0, xmm4 + psraw xmm3, 2 + psubw xmm6, xmm3 + movdqa XMMWORD PTR [esp+64], xmm6 + movdqa xmm3, xmm5 + +;;; r2 = _mm_add_epi16(b4, b3); // b4 + b3; +;;; r3 = _mm_add_epi16(b6, b1); // b6 + b1; +;;; r4 = _mm_sub_epi16(b6, b1); // b6 - b1; +;;; r5 = _mm_sub_epi16(b4, b3); // b4 - b3; +;;; r6 = _mm_add_epi16(b2, b5); // b2 + b5; +;;; r7 = _mm_sub_epi16(b0, b7); // b0 - b7; + + psubw xmm5, XMMWORD PTR [esp+64] + paddw xmm3, xmm6 + movdqa XMMWORD PTR [esp+144], xmm3 + movdqa xmm3, xmm7 + psubw xmm3, xmm0 + movdqa XMMWORD PTR [esp+48], xmm3 + movdqa xmm3, XMMWORD PTR [esp+80] + movdqa xmm4, xmm3 + paddw xmm4, xmm2 + psubw xmm3, xmm2 + +;;; +;;; +;;; // add in prediction values +;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[0][pos_x])); +;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[1][pos_x])); +;;; // (x + 32) >> 6 +;;; r0 = _mm_adds_epi16(r0, const32); + + movdqa xmm2, XMMWORD PTR const32 + movdqa XMMWORD PTR [esp+16], xmm4 + movdqa xmm4, XMMWORD PTR [esp+96] + movdqa xmm6, xmm4 + paddw xmm6, xmm1 + psubw xmm4, xmm1 + +;;; r0 = _mm_srai_epi16(r0, 6); +;;; r1 = _mm_adds_epi16(r1, const32); + + movdqa xmm1, XMMWORD PTR [esp+48] + paddw xmm7, xmm0 + movdqa xmm0, XMMWORD PTR [esp+144] + movdqa XMMWORD PTR [esp+128], xmm7 + +;;; r1 = _mm_srai_epi16(r1, 6); +;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short +;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + + movq xmm7, QWORD PTR [edx+16] + movdqa XMMWORD PTR [esp+32], xmm5 + paddsw xmm0, xmm2 + psraw xmm0, 6 + paddsw xmm1, xmm2 + pxor xmm2, xmm2 + punpcklbw xmm7, xmm2 + movq xmm5, QWORD PTR [edx] + punpcklbw xmm5, xmm2 + psraw xmm1, 6 + +;;; pred0 = _mm_adds_epi16(pred0, r0); +;;; pred1 = _mm_adds_epi16(pred1, r1); + + paddsw xmm7, xmm1 + paddsw xmm5, xmm0 + +;;; +;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + packuswb xmm5, xmm7 + +;;; +;;; // store +;;; _mm_storel_epi64((__m128i *)(&mb_rec[0][pos_x]), pred0); + + movdqa xmm0, XMMWORD PTR [esp+32] + movdqa xmm2, XMMWORD PTR [esp+128] + movq QWORD PTR [ecx], xmm5 + +;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. +;;; pred0 = _mm_srli_si128(pred0, 8); + + psrldq xmm5, 8 + +;;; _mm_storel_epi64((__m128i *)(&mb_rec[1][pos_x]), pred0); + + movq QWORD PTR [ecx+16], xmm5 + +;;; +;;; /* --- */ +;;; +;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[2][pos_x])); + + movq xmm1, QWORD PTR [edx+32] + +;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[3][pos_x])); +;;; // (x + 32) >> 6 +;;; r2 = _mm_adds_epi16(r2, const32); + + movdqa xmm5, XMMWORD PTR [esp] + movdqa XMMWORD PTR [esp+32], xmm0 ; + +;;; r2 = _mm_srai_epi16(r2, 6); +;;; r3 = _mm_adds_epi16(r3, const32); + + paddsw xmm6, xmm5 + +;;; r3 = _mm_srai_epi16(r3, 6); + + psraw xmm6, 6 + +;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + + pxor xmm7, xmm7 + punpcklbw xmm1, xmm7 + movdqa xmm0, XMMWORD PTR [esp+16] + paddsw xmm0, xmm5 + psraw xmm0, 6 + +;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short +;;; pred0 = _mm_adds_epi16(pred0, r2); + + paddsw xmm1, xmm0 + +;;; pred1 = _mm_adds_epi16(pred1, r3); +;;; +;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char +;;; +;;; // store +;;; _mm_storel_epi64((__m128i *)(&mb_rec[2][pos_x]), pred0); + + movdqa xmm0, XMMWORD PTR [esp+32] + movq xmm5, QWORD PTR [edx+48] + punpcklbw xmm5, xmm7 + paddsw xmm5, xmm6 + packuswb xmm1, xmm5 + movq QWORD PTR [ecx+32], xmm1 + +;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. +;;; pred0 = _mm_srli_si128(pred0, 8); + + psrldq xmm1, 8 + +;;; _mm_storel_epi64((__m128i *)(&mb_rec[3][pos_x]), pred0); + + movq QWORD PTR [ecx+48], xmm1 + +;;; +;;; /* --- */ +;;; +;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[4][pos_x])); + + movq xmm7, QWORD PTR [edx+64] + +;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[5][pos_x])); + + movq xmm6, QWORD PTR [edx+80] + +;;; // (x + 32) >> 6 +;;; r4 = _mm_adds_epi16(r4, const32); +;;; r4 = _mm_srai_epi16(r4, 6); +;;; r5 = _mm_adds_epi16(r5, const32); +;;; r5 = _mm_srai_epi16(r5, 6); +;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + + pxor xmm5, xmm5 + punpcklbw xmm7, xmm5 + +;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + + punpcklbw xmm6, xmm5 + movdqa xmm1, XMMWORD PTR [esp] + paddsw xmm4, xmm1 + psraw xmm4, 6 + paddsw xmm3, xmm1 + psraw xmm3, 6 + +;;; pred0 = _mm_adds_epi16(pred0, r4); + + paddsw xmm7, xmm4 + +;;; pred1 = _mm_adds_epi16(pred1, r5); + + paddsw xmm6, xmm3 + +;;; +;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + packuswb xmm7, xmm6 + +;;; +;;; // store +;;; _mm_storel_epi64((__m128i *)(&mb_rec[4][pos_x]), pred0); + + movq QWORD PTR [ecx+64], xmm7 + +;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. +;;; pred0 = _mm_srli_si128(pred0, 8); + + psrldq xmm7, 8 + +;;; _mm_storel_epi64((__m128i *)(&mb_rec[5][pos_x]), pred0); + + movq QWORD PTR [ecx+80], xmm7 + + +;;; +;;; /* --- */ +;;; +;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[6][pos_x])); + + movq xmm5, QWORD PTR [edx+96] + +;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[7][pos_x])); + + movq xmm4, QWORD PTR [edx+112] + +;;; // (x + 32) >> 6 +;;; r6 = _mm_adds_epi16(r6, const32); +;;; r6 = _mm_srai_epi16(r6, 6); +;;; r7 = _mm_adds_epi16(r7, const32); +;;; r7 = _mm_srai_epi16(r7, 6); +;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + + pxor xmm3, xmm3 + punpcklbw xmm5, xmm3 + +;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + + punpcklbw xmm4, xmm3 + movdqa xmm1, XMMWORD PTR [esp] + paddsw xmm2, xmm1 + psraw xmm2, 6 + paddsw xmm0, xmm1 + psraw xmm0, 6 + +;;; pred0 = _mm_adds_epi16(pred0, r6); + + paddsw xmm5, xmm2 + +;;; pred1 = _mm_adds_epi16(pred1, r7); + + paddsw xmm4, xmm0 + +;;; +;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + packuswb xmm5, xmm4 + +;;; +;;; // store +;;; _mm_storel_epi64((__m128i *)&mb_rec[6][pos_x], pred0); + + movq QWORD PTR [ecx+96], xmm5 + +;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. +;;; pred0 = _mm_srli_si128(pred0, 8); + + psrldq xmm5, 8 + +;;; _mm_storel_epi64((__m128i *)&mb_rec[7][pos_x], pred0); + + movq QWORD PTR [ecx+112], xmm5 + mov esp, ebp + pop ebp + ret + ALIGN 2 +_itrans8x8_sse2 ENDP + + +END
\ No newline at end of file diff --git a/Src/h264dec/ldecod/src/quant.c b/Src/h264dec/ldecod/src/quant.c new file mode 100644 index 00000000..2f01c34a --- /dev/null +++ b/Src/h264dec/ldecod/src/quant.c @@ -0,0 +1,338 @@ + +/*! +*********************************************************************** +* \file +* quant.c +* +* \brief +* Quantization functions +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* +*********************************************************************** +*/ + +#include "contributors.h" + +#include "global.h" +#include "memalloc.h" +#include "block.h" +#include "image.h" +#include "mb_access.h" +#include "transform.h" +#include "quant.h" + +int quant_intra_default[16] = { + 6,13,20,28, + 13,20,28,32, + 20,28,32,37, + 28,32,37,42 +}; + +int quant_inter_default[16] = { + 10,14,20,24, + 14,20,24,27, + 20,24,27,30, + 24,27,30,34 +}; + +int quant8_intra_default[64] = { + 6,10,13,16,18,23,25,27, + 10,11,16,18,23,25,27,29, + 13,16,18,23,25,27,29,31, + 16,18,23,25,27,29,31,33, + 18,23,25,27,29,31,33,36, + 23,25,27,29,31,33,36,38, + 25,27,29,31,33,36,38,40, + 27,29,31,33,36,38,40,42 +}; + +int quant8_inter_default[64] = { + 9,13,15,17,19,21,22,24, + 13,13,17,19,21,22,24,25, + 15,17,19,21,22,24,25,27, + 17,19,21,22,24,25,27,28, + 19,21,22,24,25,27,28,30, + 21,22,24,25,27,28,30,32, + 22,24,25,27,28,30,32,33, + 24,25,27,28,30,32,33,35 +}; + +int quant_org[16] = { //to be use if no q matrix is chosen + 16,16,16,16, + 16,16,16,16, + 16,16,16,16, + 16,16,16,16 +}; + +int quant8_org[64] = { //to be use if no q matrix is chosen + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16 +}; + +static void CalculateQuant8x8Param(Slice *currslice); + +/*! +*********************************************************************** +* \brief +* Initiate quantization process arrays +*********************************************************************** +*/ +void init_qp_process(VideoParameters *p_Vid) +{ + int bitdepth_qp_scale = imax(p_Vid->bitdepth_luma_qp_scale,p_Vid->bitdepth_chroma_qp_scale); + int i; + + // We should allocate memory outside of this process since maybe we will have a change of SPS + // and we may need to recreate these. Currently should only support same bitdepth + if (p_Vid->qp_per_matrix == NULL) + if ((p_Vid->qp_per_matrix = (int*)malloc((MAX_QP + 1 + bitdepth_qp_scale)*sizeof(int))) == NULL) + no_mem_exit("init_qp_process: p_Vid->qp_per_matrix"); + + if (p_Vid->qp_rem_matrix == NULL) + if ((p_Vid->qp_rem_matrix = (int*)malloc((MAX_QP + 1 + bitdepth_qp_scale)*sizeof(int))) == NULL) + no_mem_exit("init_qp_process: p_Vid->qp_rem_matrix"); + + for (i = 0; i < MAX_QP + bitdepth_qp_scale + 1; i++) + { + p_Vid->qp_per_matrix[i] = i / 6; + p_Vid->qp_rem_matrix[i] = i % 6; + } +} + +void free_qp_matrices(VideoParameters *p_Vid) +{ + if (p_Vid->qp_per_matrix != NULL) + { + free (p_Vid->qp_per_matrix); + p_Vid->qp_per_matrix = NULL; + } + + if (p_Vid->qp_rem_matrix != NULL) + { + free (p_Vid->qp_rem_matrix); + p_Vid->qp_rem_matrix = NULL; + } +} + +/*! +************************************************************************ +* \brief +* For mapping the q-matrix to the active id and calculate quantisation values +* +* \param currSlice +* Slice pointer +* \param pps +* Picture parameter set +* \param sps +* Sequence parameter set +* +************************************************************************ +*/ +void assign_quant_params(Slice *currSlice) +{ + seq_parameter_set_rbsp_t* sps = currSlice->active_sps; + pic_parameter_set_rbsp_t* pps = currSlice->active_pps; + int i; + int n_ScalingList; + + if(!pps->pic_scaling_matrix_present_flag && !sps->seq_scaling_matrix_present_flag) + { + for(i=0; i<12; i++) + currSlice->qmatrix[i] = (i < 6) ? quant_org : quant8_org; + } + else + { + n_ScalingList = (sps->chroma_format_idc != YUV444) ? 8 : 12; + if(sps->seq_scaling_matrix_present_flag) // check sps first + { + for(i=0; i<n_ScalingList; i++) + { + if(i<6) + { + if(!sps->seq_scaling_list_present_flag[i]) // fall-back rule A + { + if(i==0) + currSlice->qmatrix[i] = quant_intra_default; + else if(i==3) + currSlice->qmatrix[i] = quant_inter_default; + else + currSlice->qmatrix[i] = currSlice->qmatrix[i-1]; + } + else + { + if(sps->UseDefaultScalingMatrix4x4Flag[i]) + currSlice->qmatrix[i] = (i<3) ? quant_intra_default : quant_inter_default; + else + currSlice->qmatrix[i] = sps->ScalingList4x4[i]; + } + } + else + { + if(!sps->seq_scaling_list_present_flag[i]) // fall-back rule A + { + if(i==6) + currSlice->qmatrix[i] = quant8_intra_default; + else if(i==7) + currSlice->qmatrix[i] = quant8_inter_default; + else + currSlice->qmatrix[i] = currSlice->qmatrix[i-2]; + } + else + { + if(sps->UseDefaultScalingMatrix8x8Flag[i-6]) + currSlice->qmatrix[i] = (i==6 || i==8 || i==10) ? quant8_intra_default:quant8_inter_default; + else + currSlice->qmatrix[i] = sps->ScalingList8x8[i-6]; + } + } + } + } + + if(pps->pic_scaling_matrix_present_flag) // then check pps + { + for(i=0; i<n_ScalingList; i++) + { + if(i<6) + { + if(!pps->pic_scaling_list_present_flag[i]) // fall-back rule B + { + if (i==0) + { + if(!sps->seq_scaling_matrix_present_flag) + currSlice->qmatrix[i] = quant_intra_default; + } + else if (i==3) + { + if(!sps->seq_scaling_matrix_present_flag) + currSlice->qmatrix[i] = quant_inter_default; + } + else + currSlice->qmatrix[i] = currSlice->qmatrix[i-1]; + } + else + { + if(pps->UseDefaultScalingMatrix4x4Flag[i]) + currSlice->qmatrix[i] = (i<3) ? quant_intra_default:quant_inter_default; + else + currSlice->qmatrix[i] = pps->ScalingList4x4[i]; + } + } + else + { + if(!pps->pic_scaling_list_present_flag[i]) // fall-back rule B + { + if (i==6) + { + if(!sps->seq_scaling_matrix_present_flag) + currSlice->qmatrix[i] = quant8_intra_default; + } + else if(i==7) + { + if(!sps->seq_scaling_matrix_present_flag) + currSlice->qmatrix[i] = quant8_inter_default; + } + else + currSlice->qmatrix[i] = currSlice->qmatrix[i-2]; + } + else + { + if(pps->UseDefaultScalingMatrix8x8Flag[i-6]) + currSlice->qmatrix[i] = (i==6 || i==8 || i==10) ? quant8_intra_default:quant8_inter_default; + else + currSlice->qmatrix[i] = pps->ScalingList8x8[i-6]; + } + } + } + } + } + + CalculateQuant4x4Param(currSlice); + if(pps->transform_8x8_mode_flag) + CalculateQuant8x8Param(currSlice); +} + +/*! +************************************************************************ +* \brief +* For calculating the quantisation values at frame level +* +************************************************************************ +*/ +void CalculateQuant4x4Param(Slice *currSlice) +{ + int i, j, k, temp; + + for(k=0; k<6; k++) + { + for(i=0; i<4; i++) + { + for(j=0; j<4; j++) + { + temp = (i<<2)+j; + currSlice->InvLevelScale4x4_Intra[0][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[0][temp]; + currSlice->InvLevelScale4x4_Intra[1][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[1][temp]; + currSlice->InvLevelScale4x4_Intra[2][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[2][temp]; + + currSlice->InvLevelScale4x4_Inter[0][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[3][temp]; + currSlice->InvLevelScale4x4_Inter[1][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[4][temp]; + currSlice->InvLevelScale4x4_Inter[2][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[5][temp]; + } + } + } +} + +/*! +************************************************************************ +* \brief +* Calculate the quantisation and inverse quantisation parameters +* +************************************************************************ +*/ +static void CalculateQuant8x8Param(Slice *currSlice) +{ + VideoParameters *p_Vid = currSlice->p_Vid; + int i, j, k, temp; + + for(k=0; k<6; k++) + { + int x = 0; + for(i=0; i<8; i++) + { + for(j=0; j<8; j++) + { + temp = (i<<3)+j; + currSlice->InvLevelScale8x8_Intra[0][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[6][temp]; + currSlice->InvLevelScale8x8_Inter[0][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[7][temp]; + x++; + } + } + } + + if( p_Vid->active_sps->chroma_format_idc == YUV444 ) // 4:4:4 + { + for(k=0; k<6; k++) + { + int x=0; + for(i=0; i<8; i++) + { + for(j=0; j<8; j++) + { + temp = (i<<3)+j; + currSlice->InvLevelScale8x8_Intra[1][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[8][temp]; + currSlice->InvLevelScale8x8_Inter[1][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[9][temp]; + currSlice->InvLevelScale8x8_Intra[2][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[10][temp]; + currSlice->InvLevelScale8x8_Inter[2][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[11][temp]; + x++; + } + } + } + } +} diff --git a/Src/h264dec/ldecod/src/sei.c b/Src/h264dec/ldecod/src/sei.c new file mode 100644 index 00000000..faa4f8ec --- /dev/null +++ b/Src/h264dec/ldecod/src/sei.c @@ -0,0 +1,2132 @@ +/*! + ************************************************************************ + * \file sei.c + * + * \brief + * Functions to implement SEI messages + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Dong Tian <tian@cs.tut.fi> + * - Karsten Suehring <suehring@hhi.de> + ************************************************************************ + */ + +#include "contributors.h" + +#include <math.h> +#include "global.h" +#include "memalloc.h" +#include "sei.h" +#include "vlc.h" +#include "header.h" +#include "mbuffer.h" +#include "parset.h" + + +// #define PRINT_BUFFERING_PERIOD_INFO // uncomment to print buffering period SEI info +// #define PRINT_PCITURE_TIMING_INFO // uncomment to print picture timing SEI info +// #define WRITE_MAP_IMAGE // uncomment to write spare picture map +// #define PRINT_SUBSEQUENCE_INFO // uncomment to print sub-sequence SEI info +// #define PRINT_SUBSEQUENCE_LAYER_CHAR // uncomment to print sub-sequence layer characteristics SEI info +// #define PRINT_SUBSEQUENCE_CHAR // uncomment to print sub-sequence characteristics SEI info +// #define PRINT_SCENE_INFORMATION // uncomment to print scene information SEI info +// #define PRINT_PAN_SCAN_RECT // uncomment to print pan-scan rectangle SEI info +// #define PRINT_RECOVERY_POINT // uncomment to print random access point SEI info +// #define PRINT_FILLER_PAYLOAD_INFO // uncomment to print filler payload SEI info +// #define PRINT_DEC_REF_PIC_MARKING // uncomment to print decoded picture buffer management repetition SEI info +// #define PRINT_RESERVED_INFO // uncomment to print reserved SEI info +// #define PRINT_USER_DATA_UNREGISTERED_INFO // uncomment to print unregistered user data SEI info +// #define PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO // uncomment to print ITU-T T.35 user data SEI info +// #define PRINT_FULL_FRAME_FREEZE_INFO // uncomment to print full-frame freeze SEI info +// #define PRINT_FULL_FRAME_FREEZE_RELEASE_INFO // uncomment to print full-frame freeze release SEI info +// #define PRINT_FULL_FRAME_SNAPSHOT_INFO // uncomment to print full-frame snapshot SEI info +// #define PRINT_PROGRESSIVE_REFINEMENT_END_INFO // uncomment to print Progressive refinement segment start SEI info +// #define PRINT_PROGRESSIVE_REFINEMENT_END_INFO // uncomment to print Progressive refinement segment end SEI info +// #define PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO // uncomment to print Motion-constrained slice group set SEI info +// #define PRINT_FILM_GRAIN_CHARACTERISTICS_INFO // uncomment to print Film grain characteristics SEI info +// #define PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO // uncomment to print deblocking filter display preference SEI info +// #define PRINT_STEREO_VIDEO_INFO_INFO // uncomment to print stero video SEI info +// #define PRINT_TONE_MAPPING // uncomment to print tone-mapping SEI info +// #define PRINT_POST_FILTER_HINT_INFO // uncomment to print post-filter hint SEI info +/*! + ************************************************************************ + * \brief + * Interpret the SEI rbsp + * \param msg + * a pointer that point to the sei message. + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void InterpretSEIMessage(byte* msg, int size, VideoParameters *p_Vid) +{ + int payload_type = 0; + int payload_size = 0; + int offset = 1; + byte tmp_byte; + + do + { + // sei_message(); + payload_type = 0; + tmp_byte = msg[offset++]; + while (tmp_byte == 0xFF) + { + payload_type += 255; + tmp_byte = msg[offset++]; + } + payload_type += tmp_byte; // this is the last byte + + payload_size = 0; + tmp_byte = msg[offset++]; + while (tmp_byte == 0xFF) + { + payload_size += 255; + tmp_byte = msg[offset++]; + } + payload_size += tmp_byte; // this is the last byte + + switch ( payload_type ) // sei_payload( type, size ); + { + case SEI_BUFFERING_PERIOD: + interpret_buffering_period_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_PIC_TIMING: + interpret_picture_timing_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_PAN_SCAN_RECT: + interpret_pan_scan_rect_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_FILLER_PAYLOAD: + interpret_filler_payload_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_USER_DATA_REGISTERED_ITU_T_T35: + interpret_user_data_registered_itu_t_t35_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_USER_DATA_UNREGISTERED: + interpret_user_data_unregistered_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_RECOVERY_POINT: + interpret_recovery_point_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_DEC_REF_PIC_MARKING_REPETITION: + interpret_dec_ref_pic_marking_repetition_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_SPARE_PIC: + interpret_spare_pic( msg+offset, payload_size, p_Vid ); + break; + case SEI_SCENE_INFO: + interpret_scene_information( msg+offset, payload_size, p_Vid ); + break; + case SEI_SUB_SEQ_INFO: + interpret_subsequence_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_SUB_SEQ_LAYER_CHARACTERISTICS: + interpret_subsequence_layer_characteristics_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_SUB_SEQ_CHARACTERISTICS: + interpret_subsequence_characteristics_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_FULL_FRAME_FREEZE: + interpret_full_frame_freeze_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_FULL_FRAME_FREEZE_RELEASE: + interpret_full_frame_freeze_release_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_FULL_FRAME_SNAPSHOT: + interpret_full_frame_snapshot_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START: + interpret_progressive_refinement_start_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END: + interpret_progressive_refinement_end_info( msg+offset, payload_size, p_Vid ); + break; + case SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET: + interpret_motion_constrained_slice_group_set_info( msg+offset, payload_size, p_Vid ); + case SEI_FILM_GRAIN_CHARACTERISTICS: + interpret_film_grain_characteristics_info ( msg+offset, payload_size, p_Vid ); + break; + case SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE: + interpret_deblocking_filter_display_preference_info ( msg+offset, payload_size, p_Vid ); + break; + case SEI_STEREO_VIDEO_INFO: + interpret_stereo_video_info_info ( msg+offset, payload_size, p_Vid ); + break; + case SEI_TONE_MAPPING: + interpret_tone_mapping( msg+offset, payload_size, p_Vid ); + break; + case SEI_POST_FILTER_HINTS: + interpret_post_filter_hints_info ( msg+offset, payload_size, p_Vid ); + default: + interpret_reserved_info( msg+offset, payload_size, p_Vid ); + break; + } + offset += payload_size; + + } while( msg[offset] != 0x80 ); // more_rbsp_data() msg[offset] != 0x80 + // ignore the trailing bits rbsp_trailing_bits(); + assert(msg[offset] == 0x80); // this is the trailing bits + assert( offset+1 == size ); +} + + +/*! +************************************************************************ +* \brief +* Interpret the spare picture SEI message +* \param payload +* a pointer that point to the sei payload +* \param size +* the size of the sei message +* \param p_Vid +* the image pointer +* +************************************************************************ +*/ +void interpret_spare_pic( byte* payload, int size, VideoParameters *p_Vid ) +{ + int i,x,y; + Bitstream* buf; + int bit0, bit1, bitc, no_bit0; + int target_frame_num = 0; + int num_spare_pics; + int delta_spare_frame_num, CandidateSpareFrameNum, SpareFrameNum = 0; + int ref_area_indicator; + + int m, n, left, right, top, bottom,directx, directy; + byte ***map; + +#ifdef WRITE_MAP_IMAGE + int symbol_size_in_bytes = p_Vid->pic_unit_bitsize_on_disk/8; + int j, k, i0, j0, tmp, kk; + char filename[20] = "map_dec.yuv"; + FILE *fp; + imgpel** Y; + static int old_pn=-1; + static int first = 1; + + printf("Spare picture SEI message\n"); +#endif + + + + assert( payload!=NULL); + assert( p_Vid!=NULL); + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + target_frame_num = ue_v("SEI: target_frame_num", buf); + +#ifdef WRITE_MAP_IMAGE + printf( "target_frame_num is %d\n", target_frame_num ); +#endif + + num_spare_pics = 1 + ue_v("SEI: num_spare_pics_minus1", buf); + +#ifdef WRITE_MAP_IMAGE + printf( "num_spare_pics is %d\n", num_spare_pics ); +#endif + + get_mem3D(&map, num_spare_pics, p_Vid->height >> 4, p_Vid->width >> 4); + + for (i=0; i<num_spare_pics; i++) + { + if (i==0) + { + CandidateSpareFrameNum = target_frame_num - 1; + if ( CandidateSpareFrameNum < 0 ) CandidateSpareFrameNum = MAX_FN - 1; + } + else + CandidateSpareFrameNum = SpareFrameNum; + + delta_spare_frame_num = ue_v("SEI: delta_spare_frame_num", buf); + + SpareFrameNum = CandidateSpareFrameNum - delta_spare_frame_num; + if( SpareFrameNum < 0 ) + SpareFrameNum = MAX_FN + SpareFrameNum; + + ref_area_indicator = ue_v("SEI: ref_area_indicator", buf); + + switch ( ref_area_indicator ) + { + case 0: // The whole frame can serve as spare picture + for (y=0; y<p_Vid->height >> 4; y++) + for (x=0; x<p_Vid->width >> 4; x++) + map[i][y][x] = 0; + break; + case 1: // The map is not compressed + for (y=0; y<p_Vid->height >> 4; y++) + for (x=0; x<p_Vid->width >> 4; x++) + { + map[i][y][x] = (byte) u_1("SEI: ref_mb_indicator", buf); + } + break; + case 2: // The map is compressed + //!KS: could not check this function, description is unclear (as stated in Ed. Note) + bit0 = 0; + bit1 = 1; + bitc = bit0; + no_bit0 = -1; + + x = ( (p_Vid->width >> 4) - 1 ) / 2; + y = ( (p_Vid->height >> 4) - 1 ) / 2; + left = right = x; + top = bottom = y; + directx = 0; + directy = 1; + + for (m=0; m<p_Vid->height >> 4; m++) + for (n=0; n<p_Vid->width >> 4; n++) + { + + if (no_bit0<0) + { + no_bit0 = ue_v("SEI: zero_run_length", buf); + } + if (no_bit0>0) + map[i][y][x] = (byte) bit0; + else + map[i][y][x] = (byte) bit1; + no_bit0--; + + // go to the next mb: + if ( directx == -1 && directy == 0 ) + { + if (x > left) x--; + else if (x == 0) + { + y = bottom + 1; + bottom++; + directx = 1; + directy = 0; + } + else if (x == left) + { + x--; + left--; + directx = 0; + directy = 1; + } + } + else if ( directx == 1 && directy == 0 ) + { + if (x < right) x++; + else if (x == (p_Vid->width >> 4) - 1) + { + y = top - 1; + top--; + directx = -1; + directy = 0; + } + else if (x == right) + { + x++; + right++; + directx = 0; + directy = -1; + } + } + else if ( directx == 0 && directy == -1 ) + { + if ( y > top) y--; + else if (y == 0) + { + x = left - 1; + left--; + directx = 0; + directy = 1; + } + else if (y == top) + { + y--; + top--; + directx = -1; + directy = 0; + } + } + else if ( directx == 0 && directy == 1 ) + { + if (y < bottom) y++; + else if (y == (p_Vid->height >> 4) - 1) + { + x = right+1; + right++; + directx = 0; + directy = -1; + } + else if (y == bottom) + { + y++; + bottom++; + directx = 1; + directy = 0; + } + } + + + } + break; + default: + printf( "Wrong ref_area_indicator %d!\n", ref_area_indicator ); + exit(0); + break; + } + + } // end of num_spare_pics + +#ifdef WRITE_MAP_IMAGE + // begin to write map seq + if ( old_pn != p_Vid->number ) + { + old_pn = p_Vid->number; + get_mem2Dpel(&Y, p_Vid->height, p_Vid->width); + if (first) + { + fp = fopen( filename, "wb" ); + first = 0; + } + else + fp = fopen( filename, "ab" ); + assert( fp != NULL ); + for (kk=0; kk<num_spare_pics; kk++) + { + for (i=0; i < p_Vid->height >> 4; i++) + for (j=0; j < p_Vid->width >> 4; j++) + { + tmp=map[kk][i][j]==0? p_Vid->max_pel_value_comp[0] : 0; + for (i0=0; i0<16; i0++) + for (j0=0; j0<16; j0++) + Y[i*16+i0][j*16+j0]=tmp; + } + + // write the map image + for (i=0; i < p_Vid->height; i++) + for (j=0; j < p_Vid->width; j++) + fwrite(&(Y[i][j]), symbol_size_in_bytes, 1, p_out); + + for (k=0; k < 2; k++) + for (i=0; i < p_Vid->height>>1; i++) + for (j=0; j < p_Vid->width>>1; j++) + fwrite(&(p_Vid->dc_pred_value_comp[1]), symbol_size_in_bytes, 1, p_out); + } + fclose( fp ); + free_mem2Dpel( Y ); + } + // end of writing map image +#undef WRITE_MAP_IMAGE +#endif + + free_mem3D( map ); + + free(buf); +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Sub-sequence information SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_subsequence_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + Bitstream* buf; + int sub_seq_layer_num, sub_seq_id, first_ref_pic_flag, leading_non_ref_pic_flag, last_pic_flag, + sub_seq_frame_num_flag, sub_seq_frame_num; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + sub_seq_layer_num = ue_v("SEI: sub_seq_layer_num" , buf); + sub_seq_id = ue_v("SEI: sub_seq_id" , buf); + first_ref_pic_flag = u_1 ("SEI: first_ref_pic_flag" , buf); + leading_non_ref_pic_flag = u_1 ("SEI: leading_non_ref_pic_flag", buf); + last_pic_flag = u_1 ("SEI: last_pic_flag" , buf); + sub_seq_frame_num_flag = u_1 ("SEI: sub_seq_frame_num_flag" , buf); + if (sub_seq_frame_num_flag) + { + sub_seq_frame_num = ue_v("SEI: sub_seq_frame_num" , buf); + } + +#ifdef PRINT_SUBSEQUENCE_INFO + printf("Sub-sequence information SEI message\n"); + printf("sub_seq_layer_num = %d\n", sub_seq_layer_num ); + printf("sub_seq_id = %d\n", sub_seq_id); + printf("first_ref_pic_flag = %d\n", first_ref_pic_flag); + printf("leading_non_ref_pic_flag = %d\n", leading_non_ref_pic_flag); + printf("last_pic_flag = %d\n", last_pic_flag); + printf("sub_seq_frame_num_flag = %d\n", sub_seq_frame_num_flag); + if (sub_seq_frame_num_flag) + { + printf("sub_seq_frame_num = %d\n", sub_seq_frame_num); + } +#endif + + free(buf); +#ifdef PRINT_SUBSEQUENCE_INFO +#undef PRINT_SUBSEQUENCE_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the Sub-sequence layer characteristics SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_subsequence_layer_characteristics_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + Bitstream* buf; + long num_sub_layers, accurate_statistics_flag, average_bit_rate, average_frame_rate; + int i; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + num_sub_layers = 1 + ue_v("SEI: num_sub_layers_minus1", buf); + +#ifdef PRINT_SUBSEQUENCE_LAYER_CHAR + printf("Sub-sequence layer characteristics SEI message\n"); + printf("num_sub_layers_minus1 = %d\n", num_sub_layers - 1); +#endif + + for (i=0; i<num_sub_layers; i++) + { + accurate_statistics_flag = u_1( "SEI: accurate_statistics_flag", buf); + average_bit_rate = u_v(16,"SEI: average_bit_rate" , buf); + average_frame_rate = u_v(16,"SEI: average_frame_rate" , buf); + +#ifdef PRINT_SUBSEQUENCE_LAYER_CHAR + printf("layer %d: accurate_statistics_flag = %ld \n", i, accurate_statistics_flag); + printf("layer %d: average_bit_rate = %ld \n", i, average_bit_rate); + printf("layer %d: average_frame_rate = %ld \n", i, average_frame_rate); +#endif + } + free (buf); +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Sub-sequence characteristics SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_subsequence_characteristics_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + Bitstream* buf; + int i; + int sub_seq_layer_num, sub_seq_id, duration_flag, average_rate_flag, accurate_statistics_flag; + unsigned long sub_seq_duration, average_bit_rate, average_frame_rate; + int num_referenced_subseqs, ref_sub_seq_layer_num, ref_sub_seq_id, ref_sub_seq_direction; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + sub_seq_layer_num = ue_v("SEI: sub_seq_layer_num", buf); + sub_seq_id = ue_v("SEI: sub_seq_id", buf); + duration_flag = u_1 ("SEI: duration_flag", buf); + +#ifdef PRINT_SUBSEQUENCE_CHAR + printf("Sub-sequence characteristics SEI message\n"); + printf("sub_seq_layer_num = %d\n", sub_seq_layer_num ); + printf("sub_seq_id = %d\n", sub_seq_id); + printf("duration_flag = %d\n", duration_flag); +#endif + + if ( duration_flag ) + { + sub_seq_duration = u_v (32, "SEI: duration_flag", buf); +#ifdef PRINT_SUBSEQUENCE_CHAR + printf("sub_seq_duration = %ld\n", sub_seq_duration); +#endif + } + + average_rate_flag = u_1 ("SEI: average_rate_flag", buf); + +#ifdef PRINT_SUBSEQUENCE_CHAR + printf("average_rate_flag = %d\n", average_rate_flag); +#endif + + if ( average_rate_flag ) + { + accurate_statistics_flag = u_1 ( "SEI: accurate_statistics_flag", buf); + average_bit_rate = u_v (16, "SEI: average_bit_rate", buf); + average_frame_rate = u_v (16, "SEI: average_frame_rate", buf); + +#ifdef PRINT_SUBSEQUENCE_CHAR + printf("accurate_statistics_flag = %d\n", accurate_statistics_flag); + printf("average_bit_rate = %ld\n", average_bit_rate); + printf("average_frame_rate = %ld\n", average_frame_rate); +#endif + } + + num_referenced_subseqs = ue_v("SEI: num_referenced_subseqs", buf); + +#ifdef PRINT_SUBSEQUENCE_CHAR + printf("num_referenced_subseqs = %d\n", num_referenced_subseqs); +#endif + + for (i=0; i<num_referenced_subseqs; i++) + { + ref_sub_seq_layer_num = ue_v("SEI: ref_sub_seq_layer_num", buf); + ref_sub_seq_id = ue_v("SEI: ref_sub_seq_id", buf); + ref_sub_seq_direction = u_1 ("SEI: ref_sub_seq_direction", buf); + +#ifdef PRINT_SUBSEQUENCE_CHAR + printf("ref_sub_seq_layer_num = %d\n", ref_sub_seq_layer_num); + printf("ref_sub_seq_id = %d\n", ref_sub_seq_id); + printf("ref_sub_seq_direction = %d\n", ref_sub_seq_direction); +#endif + } + + free( buf ); +#ifdef PRINT_SUBSEQUENCE_CHAR +#undef PRINT_SUBSEQUENCE_CHAR +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Scene information SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_scene_information( byte* payload, int size, VideoParameters *p_Vid ) +{ + Bitstream* buf; + int scene_id, scene_transition_type, second_scene_id; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + scene_id = ue_v("SEI: scene_id" , buf); + scene_transition_type = ue_v("SEI: scene_transition_type", buf); + if ( scene_transition_type > 3 ) + { + second_scene_id = ue_v("SEI: scene_transition_type", buf);; + } + +#ifdef PRINT_SCENE_INFORMATION + printf("Scene information SEI message\n"); + printf("scene_transition_type = %d\n", scene_transition_type); + printf("scene_id = %d\n", scene_id); + if ( scene_transition_type > 3 ) + { + printf("second_scene_id = %d\n", second_scene_id); + } +#endif + free( buf ); +#ifdef PRINT_SCENE_INFORMATION +#undef PRINT_SCENE_INFORMATION +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Filler payload SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_filler_payload_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int payload_cnt = 0; + + while (payload_cnt<size) + { + if (payload[payload_cnt] == 0xFF) + { + payload_cnt++; + } + } + + +#ifdef PRINT_FILLER_PAYLOAD_INFO + printf("Filler payload SEI message\n"); + if (payload_cnt==size) + { + printf("read %d bytes of filler payload\n", payload_cnt); + } + else + { + printf("error reading filler payload: not all bytes are 0xFF (%d of %d)\n", payload_cnt, size); + } +#endif + +#ifdef PRINT_FILLER_PAYLOAD_INFO +#undef PRINT_FILLER_PAYLOAD_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the User data unregistered SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_user_data_unregistered_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int offset = 0; + byte payload_byte; + +#ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("User data unregistered SEI message\n"); + printf("uuid_iso_11578 = 0x"); +#endif + assert (size>=16); + + for (offset = 0; offset < 16; offset++) + { +#ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("%02x",payload[offset]); +#endif + } + +#ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("\n"); +#endif + + while (offset < size) + { + payload_byte = payload[offset]; + offset ++; +#ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("Unreg data payload_byte = %d\n", payload_byte); +#endif + } +#ifdef PRINT_USER_DATA_UNREGISTERED_INFO +#undef PRINT_USER_DATA_UNREGISTERED_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the User data registered by ITU-T T.35 SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_user_data_registered_itu_t_t35_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int offset = 0; + byte itu_t_t35_country_code, itu_t_t35_country_code_extension_byte, payload_byte; + + itu_t_t35_country_code = payload[offset]; + offset++; +#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + printf("User data registered by ITU-T T.35 SEI message\n"); + printf(" itu_t_t35_country_code = %d \n", itu_t_t35_country_code); +#endif + if(itu_t_t35_country_code == 0xFF) + { + itu_t_t35_country_code_extension_byte = payload[offset]; + offset++; +#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + printf(" ITU_T_T35_COUNTRY_CODE_EXTENSION_BYTE %d \n", itu_t_t35_country_code_extension_byte); +#endif + } + while (offset < size) + { + payload_byte = payload[offset]; + offset ++; +#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + printf("itu_t_t35 payload_byte = %d\n", payload_byte); +#endif + } +#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO +#undef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Pan scan rectangle SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_pan_scan_rect_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int pan_scan_rect_cancel_flag; + int pan_scan_cnt_minus1, i; + int pan_scan_rect_repetition_period; + int pan_scan_rect_id, pan_scan_rect_left_offset, pan_scan_rect_right_offset; + int pan_scan_rect_top_offset, pan_scan_rect_bottom_offset; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + pan_scan_rect_id = ue_v("SEI: pan_scan_rect_id", buf); + + pan_scan_rect_cancel_flag = u_1("SEI: pan_scan_rect_cancel_flag", buf); + if (!pan_scan_rect_cancel_flag) + { + pan_scan_cnt_minus1 = ue_v("SEI: pan_scan_cnt_minus1", buf); + for (i = 0; i <= pan_scan_cnt_minus1; i++) + { + pan_scan_rect_left_offset = se_v("SEI: pan_scan_rect_left_offset" , buf); + pan_scan_rect_right_offset = se_v("SEI: pan_scan_rect_right_offset" , buf); + pan_scan_rect_top_offset = se_v("SEI: pan_scan_rect_top_offset" , buf); + pan_scan_rect_bottom_offset = se_v("SEI: pan_scan_rect_bottom_offset", buf); +#ifdef PRINT_PAN_SCAN_RECT + printf("Pan scan rectangle SEI message %d/%d\n", i, pan_scan_cnt_minus1); + printf("pan_scan_rect_id = %d\n", pan_scan_rect_id); + printf("pan_scan_rect_left_offset = %d\n", pan_scan_rect_left_offset); + printf("pan_scan_rect_right_offset = %d\n", pan_scan_rect_right_offset); + printf("pan_scan_rect_top_offset = %d\n", pan_scan_rect_top_offset); + printf("pan_scan_rect_bottom_offset = %d\n", pan_scan_rect_bottom_offset); +#endif + } + pan_scan_rect_repetition_period = ue_v("SEI: pan_scan_rect_repetition_period", buf); + } + + free (buf); +#ifdef PRINT_PAN_SCAN_RECT +#undef PRINT_PAN_SCAN_RECT +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Random access point SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_recovery_point_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int recovery_frame_cnt, exact_match_flag, broken_link_flag, changing_slice_group_idc; + + + Bitstream* buf; + + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + recovery_frame_cnt = ue_v( "SEI: recovery_frame_cnt" , buf); + exact_match_flag = u_1 ( "SEI: exact_match_flag" , buf); + broken_link_flag = u_1 ( "SEI: broken_link_flag" , buf); + changing_slice_group_idc = u_v ( 2, "SEI: changing_slice_group_idc", buf); + + p_Vid->recovery_point = 1; + p_Vid->recovery_frame_cnt = recovery_frame_cnt; + +#ifdef PRINT_RECOVERY_POINT + printf("Recovery point SEI message\n"); + printf("recovery_frame_cnt = %d\n", recovery_frame_cnt); + printf("exact_match_flag = %d\n", exact_match_flag); + printf("broken_link_flag = %d\n", broken_link_flag); + printf("changing_slice_group_idc = %d\n", changing_slice_group_idc); +#endif + free (buf); +#ifdef PRINT_RECOVERY_POINT +#undef PRINT_RECOVERY_POINT +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Decoded Picture Buffer Management Repetition SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_dec_ref_pic_marking_repetition_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int original_idr_flag, original_frame_num; + int original_field_pic_flag, original_bottom_field_flag; + + DecRefPicMarking_t *tmp_drpm; + + DecRefPicMarking_t *old_drpm; + int old_idr_flag , old_no_output_of_prior_pics_flag, old_long_term_reference_flag , old_adaptive_ref_pic_buffering_flag; + + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + original_idr_flag = u_1 ( "SEI: original_idr_flag" , buf); + original_frame_num = ue_v( "SEI: original_frame_num" , buf); + + if ( !p_Vid->active_sps->frame_mbs_only_flag ) + { + original_field_pic_flag = u_1 ( "SEI: original_field_pic_flag", buf); + if ( original_field_pic_flag ) + { + original_bottom_field_flag = u_1 ( "SEI: original_bottom_field_flag", buf); + } + } + +#ifdef PRINT_DEC_REF_PIC_MARKING + printf("Decoded Picture Buffer Management Repetition SEI message\n"); + printf("original_idr_flag = %d\n", original_idr_flag); + printf("original_frame_num = %d\n", original_frame_num); + if ( active_sps->frame_mbs_only_flag ) + { + printf("original_field_pic_flag = %d\n", original_field_pic_flag); + if ( original_field_pic_flag ) + { + printf("original_bottom_field_flag = %d\n", original_bottom_field_flag); + } + } +#endif + + // we need to save everything that is probably overwritten in dec_ref_pic_marking() + old_drpm = p_Vid->dec_ref_pic_marking_buffer; + old_idr_flag = p_Vid->idr_flag; + + old_no_output_of_prior_pics_flag = p_Vid->no_output_of_prior_pics_flag; + old_long_term_reference_flag = p_Vid->long_term_reference_flag; + old_adaptive_ref_pic_buffering_flag = p_Vid->adaptive_ref_pic_buffering_flag; + + // set new initial values + p_Vid->idr_flag = original_idr_flag; + p_Vid->dec_ref_pic_marking_buffer = NULL; + + dec_ref_pic_marking(p_Vid, buf); + + // print out decoded values +#ifdef PRINT_DEC_REF_PIC_MARKING + if (p_Vid->idr_flag) + { + printf("no_output_of_prior_pics_flag = %d\n", p_Vid->no_output_of_prior_pics_flag); + printf("long_term_reference_flag = %d\n", p_Vid->long_term_reference_flag); + } + else + { + printf("adaptive_ref_pic_buffering_flag = %d\n", p_Vid->adaptive_ref_pic_buffering_flag); + if (p_Vid->adaptive_ref_pic_buffering_flag) + { + tmp_drpm=p_Vid->dec_ref_pic_marking_buffer; + while (tmp_drpm != NULL) + { + printf("memory_management_control_operation = %d\n", tmp_drpm->memory_management_control_operation); + + if ((tmp_drpm->memory_management_control_operation==1)||(tmp_drpm->memory_management_control_operation==3)) + { + printf("difference_of_pic_nums_minus1 = %d\n", tmp_drpm->difference_of_pic_nums_minus1); + } + if (tmp_drpm->memory_management_control_operation==2) + { + printf("long_term_pic_num = %d\n", tmp_drpm->long_term_pic_num); + } + if ((tmp_drpm->memory_management_control_operation==3)||(tmp_drpm->memory_management_control_operation==6)) + { + printf("long_term_frame_idx = %d\n", tmp_drpm->long_term_frame_idx); + } + if (tmp_drpm->memory_management_control_operation==4) + { + printf("max_long_term_pic_idx_plus1 = %d\n", tmp_drpm->max_long_term_frame_idx_plus1); + } + tmp_drpm = tmp_drpm->Next; + } + } + } +#endif + + while (p_Vid->dec_ref_pic_marking_buffer) + { + tmp_drpm=p_Vid->dec_ref_pic_marking_buffer; + + p_Vid->dec_ref_pic_marking_buffer=tmp_drpm->Next; + free (tmp_drpm); + } + + // restore old values in p_Vid + p_Vid->dec_ref_pic_marking_buffer = old_drpm; + p_Vid->idr_flag = old_idr_flag; + p_Vid->no_output_of_prior_pics_flag = old_no_output_of_prior_pics_flag; + p_Vid->long_term_reference_flag = old_long_term_reference_flag; + p_Vid->adaptive_ref_pic_buffering_flag = old_adaptive_ref_pic_buffering_flag; + + free (buf); +#ifdef PRINT_DEC_REF_PIC_MARKING +#undef PRINT_DEC_REF_PIC_MARKING +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the Full-frame freeze SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_full_frame_freeze_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int full_frame_freeze_repetition_period; + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + full_frame_freeze_repetition_period = ue_v( "SEI: full_frame_freeze_repetition_period" , buf); + +#ifdef PRINT_FULL_FRAME_FREEZE_INFO + printf("full_frame_freeze_repetition_period = %d\n", full_frame_freeze_repetition_period); +#endif + + free (buf); +#ifdef PRINT_FULL_FRAME_FREEZE_INFO +#undef PRINT_FULL_FRAME_FREEZE_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Full-frame freeze release SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_full_frame_freeze_release_info( byte* payload, int size, VideoParameters *p_Vid ) +{ +#ifdef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO + printf("Full-frame freeze release SEI message\n"); + if (size) + { + printf("payload size of this message should be zero, but is %d bytes.\n", size); + } +#endif + +#ifdef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO +#undef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the Full-frame snapshot SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_full_frame_snapshot_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int snapshot_id; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + snapshot_id = ue_v("SEI: snapshot_id", buf); + +#ifdef PRINT_FULL_FRAME_SNAPSHOT_INFO + printf("Full-frame snapshot SEI message\n"); + printf("snapshot_id = %d\n", snapshot_id); +#endif + free (buf); +#ifdef PRINT_FULL_FRAME_SNAPSHOT_INFO +#undef PRINT_FULL_FRAME_SNAPSHOT_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the Progressive refinement segment start SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_progressive_refinement_start_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int progressive_refinement_id, num_refinement_steps_minus1; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + progressive_refinement_id = ue_v("SEI: progressive_refinement_id" , buf); + num_refinement_steps_minus1 = ue_v("SEI: num_refinement_steps_minus1", buf); + +#ifdef PRINT_PROGRESSIVE_REFINEMENT_START_INFO + printf("Progressive refinement segment start SEI message\n"); + printf("progressive_refinement_id = %d\n", progressive_refinement_id); + printf("num_refinement_steps_minus1 = %d\n", num_refinement_steps_minus1); +#endif + free (buf); +#ifdef PRINT_PROGRESSIVE_REFINEMENT_START_INFO +#undef PRINT_PROGRESSIVE_REFINEMENT_START_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Progressive refinement segment end SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_progressive_refinement_end_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int progressive_refinement_id; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + progressive_refinement_id = ue_v("SEI: progressive_refinement_id" , buf); + +#ifdef PRINT_PROGRESSIVE_REFINEMENT_END_INFO + printf("Progressive refinement segment end SEI message\n"); + printf("progressive_refinement_id = %d\n", progressive_refinement_id); +#endif + free (buf); +#ifdef PRINT_PROGRESSIVE_REFINEMENT_END_INFO +#undef PRINT_PROGRESSIVE_REFINEMENT_END_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Motion-constrained slice group set SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_motion_constrained_slice_group_set_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int num_slice_groups_minus1, slice_group_id, exact_match_flag, pan_scan_rect_flag, pan_scan_rect_id; + int i; + int sliceGroupSize; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + num_slice_groups_minus1 = ue_v("SEI: num_slice_groups_minus1" , buf); + sliceGroupSize = CeilLog2( num_slice_groups_minus1 + 1 ); +#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("Motion-constrained slice group set SEI message\n"); + printf("num_slice_groups_minus1 = %d\n", num_slice_groups_minus1); +#endif + + for (i=0; i<=num_slice_groups_minus1;i++) + { + + slice_group_id = u_v (sliceGroupSize, "SEI: slice_group_id" , buf) ; +#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("slice_group_id = %d\n", slice_group_id); +#endif + } + + exact_match_flag = u_1("SEI: exact_match_flag" , buf); + pan_scan_rect_flag = u_1("SEI: pan_scan_rect_flag" , buf); + +#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("exact_match_flag = %d\n", exact_match_flag); + printf("pan_scan_rect_flag = %d\n", pan_scan_rect_flag); +#endif + + if (pan_scan_rect_flag) + { + pan_scan_rect_id = ue_v("SEI: pan_scan_rect_id" , buf); +#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("pan_scan_rect_id = %d\n", pan_scan_rect_id); +#endif + } + + free (buf); +#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO +#undef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the film grain characteristics SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_film_grain_characteristics_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int film_grain_characteristics_cancel_flag; + int model_id, separate_colour_description_present_flag; + int film_grain_bit_depth_luma_minus8, film_grain_bit_depth_chroma_minus8, film_grain_full_range_flag, film_grain_colour_primaries, film_grain_transfer_characteristics, film_grain_matrix_coefficients; + int blending_mode_id, log2_scale_factor, comp_model_present_flag[3]; + int num_intensity_intervals_minus1, num_model_values_minus1; + int intensity_interval_lower_bound, intensity_interval_upper_bound; + int comp_model_value; + int film_grain_characteristics_repetition_period; + + int c, i, j; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + film_grain_characteristics_cancel_flag = u_1("SEI: film_grain_characteristics_cancel_flag", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("film_grain_characteristics_cancel_flag = %d\n", film_grain_characteristics_cancel_flag); +#endif + if(!film_grain_characteristics_cancel_flag) + { + + model_id = u_v(2, "SEI: model_id", buf); + separate_colour_description_present_flag = u_1("SEI: separate_colour_description_present_flag", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("model_id = %d\n", model_id); + printf("separate_colour_description_present_flag = %d\n", separate_colour_description_present_flag); +#endif + if (separate_colour_description_present_flag) + { + film_grain_bit_depth_luma_minus8 = u_v(3, "SEI: film_grain_bit_depth_luma_minus8", buf); + film_grain_bit_depth_chroma_minus8 = u_v(3, "SEI: film_grain_bit_depth_chroma_minus8", buf); + film_grain_full_range_flag = u_v(1, "SEI: film_grain_full_range_flag", buf); + film_grain_colour_primaries = u_v(8, "SEI: film_grain_colour_primaries", buf); + film_grain_transfer_characteristics = u_v(8, "SEI: film_grain_transfer_characteristics", buf); + film_grain_matrix_coefficients = u_v(8, "SEI: film_grain_matrix_coefficients", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("film_grain_bit_depth_luma_minus8 = %d\n", film_grain_bit_depth_luma_minus8); + printf("film_grain_bit_depth_chroma_minus8 = %d\n", film_grain_bit_depth_chroma_minus8); + printf("film_grain_full_range_flag = %d\n", film_grain_full_range_flag); + printf("film_grain_colour_primaries = %d\n", film_grain_colour_primaries); + printf("film_grain_transfer_characteristics = %d\n", film_grain_transfer_characteristics); + printf("film_grain_matrix_coefficients = %d\n", film_grain_matrix_coefficients); +#endif + } + blending_mode_id = u_v(2, "SEI: blending_mode_id", buf); + log2_scale_factor = u_v(4, "SEI: log2_scale_factor", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("blending_mode_id = %d\n", blending_mode_id); + printf("log2_scale_factor = %d\n", log2_scale_factor); +#endif + for (c = 0; c < 3; c ++) + { + comp_model_present_flag[c] = u_1("SEI: comp_model_present_flag", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("comp_model_present_flag = %d\n", comp_model_present_flag[c]); +#endif + } + for (c = 0; c < 3; c ++) + if (comp_model_present_flag[c]) + { + num_intensity_intervals_minus1 = u_v(8, "SEI: num_intensity_intervals_minus1", buf); + num_model_values_minus1 = u_v(3, "SEI: num_model_values_minus1", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("num_intensity_intervals_minus1 = %d\n", num_intensity_intervals_minus1); + printf("num_model_values_minus1 = %d\n", num_model_values_minus1); +#endif + for (i = 0; i <= num_intensity_intervals_minus1; i ++) + { + intensity_interval_lower_bound = u_v(8, "SEI: intensity_interval_lower_bound", buf); + intensity_interval_upper_bound = u_v(8, "SEI: intensity_interval_upper_bound", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("intensity_interval_lower_bound = %d\n", intensity_interval_lower_bound); + printf("intensity_interval_upper_bound = %d\n", intensity_interval_upper_bound); +#endif + for (j = 0; j <= num_model_values_minus1; j++) + { + comp_model_value = se_v("SEI: comp_model_value", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("comp_model_value = %d\n", comp_model_value); +#endif + } + } + } + film_grain_characteristics_repetition_period = ue_v("SEI: film_grain_characteristics_repetition_period", buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO + printf("film_grain_characteristics_repetition_period = %d\n", film_grain_characteristics_repetition_period); +#endif + } + + free (buf); +#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO +#undef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the deblocking filter display preference SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_deblocking_filter_display_preference_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int deblocking_display_preference_cancel_flag; + int display_prior_to_deblocking_preferred_flag, dec_frame_buffering_constraint_flag, deblocking_display_preference_repetition_period; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + deblocking_display_preference_cancel_flag = u_1("SEI: deblocking_display_preference_cancel_flag", buf); +#ifdef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO + printf("deblocking_display_preference_cancel_flag = %d\n", deblocking_display_preference_cancel_flag); +#endif + if(!deblocking_display_preference_cancel_flag) + { + display_prior_to_deblocking_preferred_flag = u_1("SEI: display_prior_to_deblocking_preferred_flag", buf); + dec_frame_buffering_constraint_flag = u_1("SEI: dec_frame_buffering_constraint_flag", buf); + deblocking_display_preference_repetition_period = ue_v("SEI: deblocking_display_preference_repetition_period", buf); +#ifdef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO + printf("display_prior_to_deblocking_preferred_flag = %d\n", display_prior_to_deblocking_preferred_flag); + printf("dec_frame_buffering_constraint_flag = %d\n", dec_frame_buffering_constraint_flag); + printf("deblocking_display_preference_repetition_period = %d\n", deblocking_display_preference_repetition_period); +#endif + } + + free (buf); +#ifdef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO +#undef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the stereo video info SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_stereo_video_info_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int field_views_flags; + int top_field_is_left_view_flag, current_frame_is_left_view_flag, next_frame_is_second_view_flag; + int left_view_self_contained_flag; + int right_view_self_contained_flag; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + field_views_flags = u_1("SEI: field_views_flags", buf); +#ifdef PRINT_STEREO_VIDEO_INFO_INFO + printf("field_views_flags = %d\n", field_views_flags); +#endif + if (field_views_flags) + { + top_field_is_left_view_flag = u_1("SEI: top_field_is_left_view_flag", buf); +#ifdef PRINT_STEREO_VIDEO_INFO_INFO + printf("top_field_is_left_view_flag = %d\n", top_field_is_left_view_flag); +#endif + } + else + { + current_frame_is_left_view_flag = u_1("SEI: current_frame_is_left_view_flag", buf); + next_frame_is_second_view_flag = u_1("SEI: next_frame_is_second_view_flag", buf); +#ifdef PRINT_STEREO_VIDEO_INFO_INFO + printf("current_frame_is_left_view_flag = %d\n", current_frame_is_left_view_flag); + printf("next_frame_is_second_view_flag = %d\n", next_frame_is_second_view_flag); +#endif + } + + left_view_self_contained_flag = u_1("SEI: left_view_self_contained_flag", buf); + right_view_self_contained_flag = u_1("SEI: right_view_self_contained_flag", buf); +#ifdef PRINT_STEREO_VIDEO_INFO_INFO + printf("left_view_self_contained_flag = %d\n", left_view_self_contained_flag); + printf("right_view_self_contained_flag = %d\n", right_view_self_contained_flag); +#endif + + free (buf); +#ifdef PRINT_STEREO_VIDEO_INFO_INFO +#undef PRINT_STEREO_VIDEO_INFO_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the Reserved SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_reserved_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int offset = 0; + byte payload_byte; + +#ifdef PRINT_RESERVED_INFO + printf("Reserved SEI message\n"); +#endif + + while (offset < size) + { + payload_byte = payload[offset]; + offset ++; +#ifdef PRINT_RESERVED_INFO + printf("reserved_sei_message_payload_byte = %d\n", payload_byte); +#endif + } +#ifdef PRINT_RESERVED_INFO +#undef PRINT_RESERVED_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Buffering period SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_buffering_period_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + int seq_parameter_set_id, initial_cpb_removal_delay, initial_cpb_removal_delay_offset; + unsigned int k; + + Bitstream* buf; + seq_parameter_set_rbsp_t *sps; + + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + seq_parameter_set_id = ue_v("SEI: seq_parameter_set_id" , buf); + + sps = &p_Vid->SeqParSet[seq_parameter_set_id]; + + activate_sps(p_Vid, sps); + +#ifdef PRINT_BUFFERING_PERIOD_INFO + printf("Buffering period SEI message\n"); + printf("seq_parameter_set_id = %d\n", seq_parameter_set_id); +#endif + + // Note: NalHrdBpPresentFlag and CpbDpbDelaysPresentFlag can also be set "by some means not specified in this Recommendation | International Standard" + if (sps->vui_parameters_present_flag) + { + + if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + { + for (k=0; k<sps->vui_seq_parameters.nal_hrd_parameters.cpb_cnt_minus1+1; k++) + { + initial_cpb_removal_delay = u_v(sps->vui_seq_parameters.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay" , buf); + initial_cpb_removal_delay_offset = u_v(sps->vui_seq_parameters.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay_offset" , buf); + +#ifdef PRINT_BUFFERING_PERIOD_INFO + printf("nal initial_cpb_removal_delay[%d] = %d\n", k, initial_cpb_removal_delay); + printf("nal initial_cpb_removal_delay_offset[%d] = %d\n", k, initial_cpb_removal_delay_offset); +#endif + } + } + + if (sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + for (k=0; k<sps->vui_seq_parameters.vcl_hrd_parameters.cpb_cnt_minus1+1; k++) + { + initial_cpb_removal_delay = u_v(sps->vui_seq_parameters.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay" , buf); + initial_cpb_removal_delay_offset = u_v(sps->vui_seq_parameters.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay_offset" , buf); + +#ifdef PRINT_BUFFERING_PERIOD_INFO + printf("vcl initial_cpb_removal_delay[%d] = %d\n", k, initial_cpb_removal_delay); + printf("vcl initial_cpb_removal_delay_offset[%d] = %d\n", k, initial_cpb_removal_delay_offset); +#endif + } + } + } + + free (buf); +#ifdef PRINT_BUFFERING_PERIOD_INFO +#undef PRINT_BUFFERING_PERIOD_INFO +#endif +} + + +/*! + ************************************************************************ + * \brief + * Interpret the Picture timing SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_picture_timing_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + int cpb_removal_delay, dpb_output_delay, picture_structure_present_flag, picture_structure; + int clock_time_stamp_flag; + int ct_type, nuit_field_based_flag, counting_type, full_timestamp_flag, discontinuity_flag, cnt_dropped_flag, nframes; + int seconds_value, minutes_value, hours_value, seconds_flag, minutes_flag, hours_flag, time_offset; + int NumClockTs = 0; + int i; + + int cpb_removal_len = 24; + int dpb_output_len = 24; + + Boolean CpbDpbDelaysPresentFlag; + + Bitstream* buf; + + if (NULL==active_sps) + { + fprintf (stderr, "Warning: no active SPS, timing SEI cannot be parsed\n"); + return; + } + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + +#ifdef PRINT_PCITURE_TIMING_INFO + printf("Picture timing SEI message\n"); +#endif + + // CpbDpbDelaysPresentFlag can also be set "by some means not specified in this Recommendation | International Standard" + CpbDpbDelaysPresentFlag = (Boolean) (active_sps->vui_parameters_present_flag + && ( (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag != 0) + ||(active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag != 0))); + + if (CpbDpbDelaysPresentFlag ) + { + if (active_sps->vui_parameters_present_flag) + { + if (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + { + cpb_removal_len = active_sps->vui_seq_parameters.nal_hrd_parameters.cpb_removal_delay_length_minus1 + 1; + dpb_output_len = active_sps->vui_seq_parameters.nal_hrd_parameters.dpb_output_delay_length_minus1 + 1; + } + else if (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + cpb_removal_len = active_sps->vui_seq_parameters.vcl_hrd_parameters.cpb_removal_delay_length_minus1 + 1; + dpb_output_len = active_sps->vui_seq_parameters.vcl_hrd_parameters.dpb_output_delay_length_minus1 + 1; + } + } + + if ((active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag)|| + (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)) + { + cpb_removal_delay = u_v(cpb_removal_len, "SEI: cpb_removal_delay" , buf); + dpb_output_delay = u_v(dpb_output_len, "SEI: dpb_output_delay" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("cpb_removal_delay = %d\n",cpb_removal_delay); + printf("dpb_output_delay = %d\n",dpb_output_delay); +#endif + } + } + + if (!active_sps->vui_parameters_present_flag) + { + picture_structure_present_flag = 0; + } + else + { + picture_structure_present_flag = active_sps->vui_seq_parameters.pic_struct_present_flag; + } + + if (picture_structure_present_flag) + { + picture_structure = u_v(4, "SEI: pic_struct" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("picture_structure = %d\n",picture_structure); +#endif + switch (picture_structure) + { + case 0: + case 1: + case 2: + NumClockTs = 1; + break; + case 3: + case 4: + case 7: + NumClockTs = 2; + break; + case 5: + case 6: + case 8: + NumClockTs = 3; + break; + default: + error("reserved picture_structure used (can't determine NumClockTs)", 500); + } + for (i=0; i<NumClockTs; i++) + { + clock_time_stamp_flag = u_1("SEI: clock_time_stamp_flag" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("clock_time_stamp_flag = %d\n",clock_time_stamp_flag); +#endif + if (clock_time_stamp_flag) + { + ct_type = u_v(2, "SEI: ct_type" , buf); + nuit_field_based_flag = u_1( "SEI: nuit_field_based_flag" , buf); + counting_type = u_v(5, "SEI: counting_type" , buf); + full_timestamp_flag = u_1( "SEI: full_timestamp_flag" , buf); + discontinuity_flag = u_1( "SEI: discontinuity_flag" , buf); + cnt_dropped_flag = u_1( "SEI: cnt_dropped_flag" , buf); + nframes = u_v(8, "SEI: nframes" , buf); + +#ifdef PRINT_PCITURE_TIMING_INFO + printf("ct_type = %d\n",ct_type); + printf("nuit_field_based_flag = %d\n",nuit_field_based_flag); + printf("full_timestamp_flag = %d\n",full_timestamp_flag); + printf("discontinuity_flag = %d\n",discontinuity_flag); + printf("cnt_dropped_flag = %d\n",cnt_dropped_flag); + printf("nframes = %d\n",nframes); +#endif + if (full_timestamp_flag) + { + seconds_value = u_v(6, "SEI: seconds_value" , buf); + minutes_value = u_v(6, "SEI: minutes_value" , buf); + hours_value = u_v(5, "SEI: hours_value" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("seconds_value = %d\n",seconds_value); + printf("minutes_value = %d\n",minutes_value); + printf("hours_value = %d\n",hours_value); +#endif + } + else + { + seconds_flag = u_1( "SEI: seconds_flag" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("seconds_flag = %d\n",seconds_flag); +#endif + if (seconds_flag) + { + seconds_value = u_v(6, "SEI: seconds_value" , buf); + minutes_flag = u_1( "SEI: minutes_flag" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("seconds_value = %d\n",seconds_value); + printf("minutes_flag = %d\n",minutes_flag); +#endif + if(minutes_flag) + { + minutes_value = u_v(6, "SEI: minutes_value" , buf); + hours_flag = u_1( "SEI: hours_flag" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("minutes_value = %d\n",minutes_value); + printf("hours_flag = %d\n",hours_flag); +#endif + if(hours_flag) + { + hours_value = u_v(5, "SEI: hours_value" , buf); +#ifdef PRINT_PCITURE_TIMING_INFO + printf("hours_value = %d\n",hours_value); +#endif + } + } + } + } + { + int time_offset_length; + if (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + time_offset_length = active_sps->vui_seq_parameters.vcl_hrd_parameters.time_offset_length; + else if (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + time_offset_length = active_sps->vui_seq_parameters.nal_hrd_parameters.time_offset_length; + else + time_offset_length = 24; + if (time_offset_length) + time_offset = i_v(time_offset_length, "SEI: time_offset" , buf); + else + time_offset = 0; +#ifdef PRINT_PCITURE_TIMING_INFO + printf("time_offset = %d\n",time_offset); +#endif + } + } + } + } + + free (buf); +#ifdef PRINT_PCITURE_TIMING_INFO +#undef PRINT_PCITURE_TIMING_INFO +#endif +} + +/*! + ************************************************************************ + * \brief + * Interpret the HDR tone-mapping SEI message (JVT-T060) + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +typedef struct +{ + unsigned int tone_map_id; + unsigned char tone_map_cancel_flag; + unsigned int tone_map_repetition_period; + unsigned char coded_data_bit_depth; + unsigned char sei_bit_depth; + unsigned int model_id; + // variables for model 0 + int min_value; + int max_value; + // variables for model 1 + int sigmoid_midpoint; + int sigmoid_width; + // variables for model 2 + int start_of_coded_interval[1<<MAX_SEI_BIT_DEPTH]; + // variables for model 3 + int num_pivots; + int coded_pivot_value[MAX_NUM_PIVOTS]; + int sei_pivot_value[MAX_NUM_PIVOTS]; +} tone_mapping_struct_tmp; + +void interpret_tone_mapping( byte* payload, int size, VideoParameters *p_Vid ) +{ + tone_mapping_struct_tmp seiToneMappingTmp; + Bitstream* buf; + int i = 0, max_coded_num, max_output_num; + + memset (&seiToneMappingTmp, 0, sizeof (tone_mapping_struct_tmp)); + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + seiToneMappingTmp.tone_map_id = ue_v("SEI: tone_map_id", buf); + seiToneMappingTmp.tone_map_cancel_flag = (unsigned char) u_1("SEI: tone_map_cancel_flag", buf); + +#ifdef PRINT_TONE_MAPPING + printf("Tone-mapping SEI message\n"); + printf("tone_map_id = %d\n", seiToneMappingTmp.tone_map_id); + + if (seiToneMappingTmp.tone_map_id != 0) + printf("WARNING! Tone_map_id != 0, print the SEI message info only. The tone mapping is actually applied only when Tone_map_id==0\n\n"); + printf("tone_map_cancel_flag = %d\n", seiToneMappingTmp.tone_map_cancel_flag); +#endif + + if (!seiToneMappingTmp.tone_map_cancel_flag) + { + seiToneMappingTmp.tone_map_repetition_period = ue_v( "SEI: tone_map_repetition_period", buf); + seiToneMappingTmp.coded_data_bit_depth = (unsigned char)u_v (8,"SEI: coded_data_bit_depth" , buf); + seiToneMappingTmp.sei_bit_depth = (unsigned char)u_v (8,"SEI: sei_bit_depth" , buf); + + seiToneMappingTmp.model_id = ue_v( "SEI: model_id" , buf); + +#ifdef PRINT_TONE_MAPPING + printf("tone_map_repetition_period = %d\n", seiToneMappingTmp.tone_map_repetition_period); + printf("coded_data_bit_depth = %d\n", seiToneMappingTmp.coded_data_bit_depth); + printf("sei_bit_depth = %d\n", seiToneMappingTmp.sei_bit_depth); + printf("model_id = %d\n", seiToneMappingTmp.model_id); +#endif + + max_coded_num = 1<<seiToneMappingTmp.coded_data_bit_depth; + max_output_num = 1<<seiToneMappingTmp.sei_bit_depth; + + if (seiToneMappingTmp.model_id == 0) + { // linear mapping with clipping + seiToneMappingTmp.min_value = u_v (32,"SEI: min_value", buf); + seiToneMappingTmp.max_value = u_v (32,"SEI: min_value", buf); +#ifdef PRINT_TONE_MAPPING + printf("min_value = %d, max_value = %d\n", seiToneMappingTmp.min_value, seiToneMappingTmp.max_value); +#endif + } + else if (seiToneMappingTmp.model_id == 1) + { // sigmoidal mapping + seiToneMappingTmp.sigmoid_midpoint = u_v (32,"SEI: sigmoid_midpoint", buf); + seiToneMappingTmp.sigmoid_width = u_v (32,"SEI: sigmoid_width", buf); +#ifdef PRINT_TONE_MAPPING + printf("sigmoid_midpoint = %d, sigmoid_width = %d\n", seiToneMappingTmp.sigmoid_midpoint, seiToneMappingTmp.sigmoid_width); +#endif + } + else if (seiToneMappingTmp.model_id == 2) + { // user defined table mapping + for (i=0; i<max_output_num; i++) + { + seiToneMappingTmp.start_of_coded_interval[i] = u_v((((seiToneMappingTmp.coded_data_bit_depth+7)>>3)<<3), "SEI: start_of_coded_interval" , buf); +#ifdef PRINT_TONE_MAPPING // too long to print + //printf("start_of_coded_interval[%d] = %d\n", i, seiToneMappingTmp.start_of_coded_interval[i]); +#endif + } + } + else if (seiToneMappingTmp.model_id == 3) + { // piece-wise linear mapping + seiToneMappingTmp.num_pivots = u_v (16,"SEI: num_pivots", buf); +#ifdef PRINT_TONE_MAPPING + printf("num_pivots = %d\n", seiToneMappingTmp.num_pivots); +#endif + seiToneMappingTmp.coded_pivot_value[0] = 0; + seiToneMappingTmp.sei_pivot_value[0] = 0; + seiToneMappingTmp.coded_pivot_value[seiToneMappingTmp.num_pivots+1] = max_coded_num-1; + seiToneMappingTmp.sei_pivot_value[seiToneMappingTmp.num_pivots+1] = max_output_num-1; + + for (i=1; i < seiToneMappingTmp.num_pivots+1; i++) + { + seiToneMappingTmp.coded_pivot_value[i] = u_v( (((seiToneMappingTmp.coded_data_bit_depth+7)>>3)<<3), "SEI: coded_pivot_value", buf); + seiToneMappingTmp.sei_pivot_value[i] = u_v( (((seiToneMappingTmp.sei_bit_depth+7)>>3)<<3), "SEI: sei_pivot_value", buf); +#ifdef PRINT_TONE_MAPPING + printf("coded_pivot_value[%d] = %d, sei_pivot_value[%d] = %d\n", i, seiToneMappingTmp.coded_pivot_value[i], i, seiToneMappingTmp.sei_pivot_value[i]); +#endif + } + } + +#if (ENABLE_OUTPUT_TONEMAPPING) + // Currently, only when the map_id == 0, the tone-mapping is actually applied. + if (seiToneMappingTmp.tone_map_id== 0) + { + int j; + p_Vid->seiToneMapping->seiHasTone_mapping = TRUE; + p_Vid->seiToneMapping->tone_map_repetition_period = seiToneMappingTmp.tone_map_repetition_period; + p_Vid->seiToneMapping->coded_data_bit_depth = seiToneMappingTmp.coded_data_bit_depth; + p_Vid->seiToneMapping->sei_bit_depth = seiToneMappingTmp.sei_bit_depth; + p_Vid->seiToneMapping->model_id = seiToneMappingTmp.model_id; + p_Vid->seiToneMapping->count = 0; + + // generate the look up table of tone mapping + switch(seiToneMappingTmp.model_id) + { + case 0: // linear mapping with clipping + for (i=0; i<=seiToneMappingTmp.min_value; i++) + p_Vid->seiToneMapping->lut[i] = 0; + + for (i=seiToneMappingTmp.min_value+1; i < seiToneMappingTmp.max_value; i++) + p_Vid->seiToneMapping->lut[i] = (imgpel) ((i-seiToneMappingTmp.min_value) * (max_output_num-1)/(seiToneMappingTmp.max_value- seiToneMappingTmp.min_value)); + + for (i=seiToneMappingTmp.max_value; i<max_coded_num; i++) + p_Vid->seiToneMapping->lut[i] =(imgpel) (max_output_num - 1); + break; + case 1: // sigmoid mapping + + for (i=0; i < max_coded_num; i++) + { +#if 0 + int j = (int)(1 + exp( -6*(double)(i-seiToneMappingTmp.sigmoid_midpoint)/seiToneMappingTmp.sigmoid_width)); + p_Vid->seiToneMapping->lut[i] = ((max_output_num-1)+(j>>1)) / j; +#else + double tmp = 1.0 + exp( -6*(double)(i-seiToneMappingTmp.sigmoid_midpoint)/seiToneMappingTmp.sigmoid_width); + p_Vid->seiToneMapping->lut[i] = (imgpel)( (double)(max_output_num-1)/ tmp + 0.5); +#endif + } + break; + case 2: // user defined table + if (0 < max_output_num-1) + { + for (j=0; j<max_output_num-1; j++) + { + for (i=seiToneMappingTmp.start_of_coded_interval[j]; i<seiToneMappingTmp.start_of_coded_interval[j+1]; i++) + { + p_Vid->seiToneMapping->lut[i] = (imgpel) j; + } + } + p_Vid->seiToneMapping->lut[i] = (imgpel) (max_output_num - 1); + } + break; + case 3: // piecewise linear mapping + for (j=0; j<seiToneMappingTmp.num_pivots+1; j++) + { +#if 0 + slope = ((seiToneMappingTmp.sei_pivot_value[j+1] - seiToneMappingTmp.sei_pivot_value[j])<<16)/(seiToneMappingTmp.coded_pivot_value[j+1]-seiToneMappingTmp.coded_pivot_value[j]); + for (i=seiToneMappingTmp.coded_pivot_value[j]; i <= seiToneMappingTmp.coded_pivot_value[j+1]; i++) + { + p_Vid->seiToneMapping->lut[i] = seiToneMappingTmp.sei_pivot_value[j] + (( (i - seiToneMappingTmp.coded_pivot_value[j]) * slope)>>16); + } +#else + double slope = (double)(seiToneMappingTmp.sei_pivot_value[j+1] - seiToneMappingTmp.sei_pivot_value[j])/(seiToneMappingTmp.coded_pivot_value[j+1]-seiToneMappingTmp.coded_pivot_value[j]); + for (i=seiToneMappingTmp.coded_pivot_value[j]; i <= seiToneMappingTmp.coded_pivot_value[j+1]; i++) + { + p_Vid->seiToneMapping->lut[i] = (imgpel) (seiToneMappingTmp.sei_pivot_value[j] + (int)(( (i - seiToneMappingTmp.coded_pivot_value[j]) * slope))); + } +#endif + } + break; + + default: + break; + } // end switch + } +#endif + } // end !tone_map_cancel_flag + free (buf); +} + +#if (ENABLE_OUTPUT_TONEMAPPING) +// tone map using the look-up-table generated according to SEI tone mapping message +void tone_map (imgpel** imgX, imgpel* lut, int size_x, int size_y) +{ + int i, j; + + for(i=0;i<size_y;i++) + { + for(j=0;j<size_x;j++) + { + imgX[i][j] = (imgpel)lut[imgX[i][j]]; + } + } +} + +void init_tone_mapping_sei(ToneMappingSEI *seiToneMapping) +{ + seiToneMapping->seiHasTone_mapping = FALSE; + seiToneMapping->count = 0; +} + +void update_tone_mapping_sei(ToneMappingSEI *seiToneMapping) +{ + + if(seiToneMapping->tone_map_repetition_period == 0) + { + seiToneMapping->seiHasTone_mapping = FALSE; + seiToneMapping->count = 0; + } + else if (seiToneMapping->tone_map_repetition_period>1) + { + seiToneMapping->count++; + if (seiToneMapping->count>=seiToneMapping->tone_map_repetition_period) + { + seiToneMapping->seiHasTone_mapping = FALSE; + seiToneMapping->count = 0; + } + } +} +#endif + +/*! + ************************************************************************ + * \brief + * Interpret the post filter hints SEI message (JVT-U035) + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param p_Vid + * the image pointer + * + ************************************************************************ + */ +void interpret_post_filter_hints_info( byte* payload, int size, VideoParameters *p_Vid ) +{ + Bitstream* buf; + unsigned int filter_hint_size_y, filter_hint_size_x, filter_hint_type, color_component, cx, cy, additional_extension_flag; + int ***filter_hint; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + filter_hint_size_y = ue_v("SEI: filter_hint_size_y", buf); // interpret post-filter hint SEI here + filter_hint_size_x = ue_v("SEI: filter_hint_size_x", buf); // interpret post-filter hint SEI here + filter_hint_type = u_v(2, "SEI: filter_hint_type", buf); // interpret post-filter hint SEI here + + get_mem3Dint (&filter_hint, 3, filter_hint_size_y, filter_hint_size_x); + + for (color_component = 0; color_component < 3; color_component ++) + for (cy = 0; cy < filter_hint_size_y; cy ++) + for (cx = 0; cx < filter_hint_size_x; cx ++) + filter_hint[color_component][cy][cx] = se_v("SEI: filter_hint", buf); // interpret post-filter hint SEI here + + additional_extension_flag = u_1("SEI: additional_extension_flag", buf); // interpret post-filter hint SEI here + +#ifdef PRINT_POST_FILTER_HINT_INFO + printf(" Post-filter hint SEI message\n"); + printf(" post_filter_hint_size_y %d \n", filter_hint_size_y); + printf(" post_filter_hint_size_x %d \n", filter_hint_size_x); + printf(" post_filter_hint_type %d \n", filter_hint_type); + for (color_component = 0; color_component < 3; color_component ++) + for (cy = 0; cy < filter_hint_size_y; cy ++) + for (cx = 0; cx < filter_hint_size_x; cx ++) + printf(" post_filter_hint[%d][%d][%d] %d \n", color_component, cy, cx, filter_hint[color_component][cy][cx]); + + printf(" additional_extension_flag %d \n", additional_extension_flag); + +#undef PRINT_POST_FILTER_HINT_INFO +#endif + + free_mem3Dint (filter_hint); + free( buf ); +} diff --git a/Src/h264dec/ldecod/src/storable_picture.c b/Src/h264dec/ldecod/src/storable_picture.c new file mode 100644 index 00000000..c12b68b3 --- /dev/null +++ b/Src/h264dec/ldecod/src/storable_picture.c @@ -0,0 +1,287 @@ +#include "global.h" +#include "mbuffer.h" +#include "memalloc.h" + +static void alloc_pic_motion(VideoParameters *p_Vid, PicMotionParams *motion, int size_y, int size_x) +{ + // TODO: benski> re-use memory just like for image data + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + if (!active_sps->frame_mbs_only_flag) + { + get_mem3Dref(&(motion->field_references), 4, size_y, size_x); + } + else + { + motion->field_references = 0; // just in case + } + + if (motion_cache_dimensions_match(&p_Vid->motion_cache, size_x, size_y)) + { + motion->motion[LIST_0]=motion_cache_get(&p_Vid->motion_cache); + motion->motion[LIST_1]=motion_cache_get(&p_Vid->motion_cache); + } + if (!motion->motion[LIST_0]) + get_mem2DPicMotion(&(motion->motion[LIST_0]), size_y, size_x); + if (!motion->motion[LIST_1]) + get_mem2DPicMotion(&(motion->motion[LIST_1]), size_y, size_x); + + motion->mb_field = calloc (size_y * size_x, sizeof(byte)); + if (motion->mb_field == NULL) + no_mem_exit("alloc_storable_picture: motion->mb_field"); + + get_mem2D (&(motion->field_frame), size_y, size_x); +} + +void free_pic_motion(VideoParameters *p_Vid, PicMotionParams *motion, int size_x, int size_y) +{ + if (motion->motion[LIST_0]) + { + if (motion_cache_dimensions_match(&p_Vid->motion_cache, size_x / BLOCK_SIZE, size_y / BLOCK_SIZE)) + { + motion_cache_add(&p_Vid->motion_cache,motion->motion[LIST_0]); + motion_cache_add(&p_Vid->motion_cache,motion->motion[LIST_1]); + } + else + { + free_mem2DPicMotion(motion->motion[LIST_0]); + free_mem2DPicMotion(motion->motion[LIST_1]); + } + motion->motion[LIST_0] = NULL; + motion->motion[LIST_1] = NULL; + } + + if (motion->field_references) + { + free_mem3Dref(motion->field_references); + motion->field_references=0; + } + + if (motion->mb_field) + { + free(motion->mb_field); + motion->mb_field = NULL; + } + + if (motion->field_frame) + { + free_mem2D (motion->field_frame); + motion->field_frame=NULL; + } +} + + +/*! + ************************************************************************ + * \brief + * Free picture memory. + * + * \param p_Vid + * image decoding parameters for current picture + * \param p + * Picture to be freed + * + ************************************************************************ + */ +static void internal_free_storable_picture(VideoParameters *p_Vid, StorablePicture* p) +{ + int nplane; + if (p) + { + + free_pic_motion(p_Vid, &p->motion, p->size_x, p->size_y); + + //if( IS_INDEPENDENT(p_Vid) ) + { + for( nplane=0; nplane<MAX_PLANE; nplane++ ) + { + free_pic_motion(p_Vid, &p->JVmotion[nplane], p->size_x, p->size_y); + } + } + + if (image_cache_dimensions_match(&p_Vid->image_cache[0], p->size_x, p->size_y)) + image_cache_add(&p_Vid->image_cache[0], p->imgY); + else + free_memImage(p->imgY); + + if (image_cache_dimensions_match(&p_Vid->image_cache[1], p->size_x_cr, p->size_y_cr)) + image_cache_add(&p_Vid->image_cache[1], p->imgUV[0]); + else + free_memImage(p->imgUV[0]); + + if (image_cache_dimensions_match(&p_Vid->image_cache[1], p->size_x_cr, p->size_y_cr)) + image_cache_add(&p_Vid->image_cache[1], p->imgUV[1]); + else + free_memImage(p->imgUV[1]); + + if (p->slice_id) + { + free_mem2Dshort(p->slice_id); + p->slice_id=NULL; + } + + if (p->seiHasTone_mapping) + free(p->tone_mapping_lut); + + _aligned_free(p); + p = NULL; + } +} + +void free_storable_picture(VideoParameters *p_Vid, StorablePicture* p) +{ + if (p && --p->retain_count == 0) + { + internal_free_storable_picture(p_Vid, p); + } +} + + +/*! + ************************************************************************ + * \brief + * Allocate memory for a stored picture. + * + * \param p_Vid + * image decoding parameters for current picture + * \param structure + * picture structure + * \param size_x + * horizontal luma size + * \param size_y + * vertical luma size + * \param size_x_cr + * horizontal chroma size + * \param size_y_cr + * vertical chroma size + * + * \return + * the allocated StorablePicture structure + ************************************************************************ + */ +#define ROUNDUP32(size) (((size)+31) & ~31) + +StorablePicture* alloc_storable_picture(VideoParameters *p_Vid, PictureStructure structure, int size_x, int size_y, int size_x_cr, int size_y_cr) +{ + seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps; + + StorablePicture *s; + int nplane; + + //printf ("Allocating (%s) picture (x=%d, y=%d, x_cr=%d, y_cr=%d)\n", (type == FRAME)?"FRAME":(type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", size_x, size_y, size_x_cr, size_y_cr); + s = _aligned_malloc(sizeof(StorablePicture), 32); + if (NULL==s) + return 0; + memset(s, 0, sizeof(StorablePicture)); + + s->retain_count = 1; + s->time_code = (uint64_t)-666; + + if (structure!=FRAME) + { + size_y /= 2; + size_y_cr /= 2; + } + + s->PicSizeInMbs = (size_x*size_y)/256; + + if (image_cache_dimensions_match(&p_Vid->image_cache[0], size_x, size_y)) + s->imgY = image_cache_get(&p_Vid->image_cache[0]); + if (!s->imgY) + s->imgY = get_memImage(size_x, size_y); + + if (active_sps->chroma_format_idc != YUV400) + { + if (image_cache_dimensions_match(&p_Vid->image_cache[1], size_x_cr, size_y_cr)) + { + s->imgUV[0] = image_cache_get(&p_Vid->image_cache[1]); + s->imgUV[1] = image_cache_get(&p_Vid->image_cache[1]); + } + + if (!s->imgUV[0]) + s->imgUV[0] = get_memImage(size_x_cr, size_y); + if (!s->imgUV[1]) + s->imgUV[1] = get_memImage(size_x_cr, size_y); + } + + get_mem2Dshort (&(s->slice_id), size_y / MB_BLOCK_SIZE, size_x / MB_BLOCK_SIZE); + + alloc_pic_motion(p_Vid, &s->motion, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + + if( IS_INDEPENDENT(p_Vid) ) + { + for( nplane=0; nplane<MAX_PLANE; nplane++ ) + { + alloc_pic_motion(p_Vid, &s->JVmotion[nplane], size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + } + } + + s->structure=structure; + + s->size_x = size_x; + s->size_y = size_y; + s->size_x_cr = size_x_cr; + s->size_y_cr = size_y_cr; + s->size_x_m1 = size_x - 1; + s->size_y_m1 = size_y - 1; + s->size_x_cr_m1 = size_x_cr - 1; + s->size_y_cr_m1 = size_y_cr - 1; + + s->top_field = p_Vid->no_reference_picture; + s->bottom_field = p_Vid->no_reference_picture; + s->frame = p_Vid->no_reference_picture; + + return s; +} + +void out_storable_picture_add(VideoParameters *img, StorablePicture *pic) +{ + if (img->out_pictures) + { + // see if we're full + if (img->size_out_pictures == img->num_out_pictures) + { + StorablePicture *pic=0; + out_storable_picture_get(img, &pic); + if (pic) + free_storable_picture(img, pic); + } + + img->out_pictures[img->num_out_pictures++] = pic; + pic->retain_count++; + } +} + +void out_storable_picture_get(VideoParameters *img, StorablePicture **pic) +{ + *pic = 0; + if (img->out_pictures && img->num_out_pictures) + { + *pic = img->out_pictures[0]; + img->num_out_pictures--; + memmove(img->out_pictures, &img->out_pictures[1], img->num_out_pictures * sizeof(StorablePicture *)); + } +} + +void out_storable_pictures_init(VideoParameters *img, size_t count) +{ + img->out_pictures = (StorablePicture **)calloc(sizeof(StorablePicture *), count); + img->size_out_pictures = count; + img->num_out_pictures = 0; +} + +void out_storable_pictures_destroy(VideoParameters *img) +{ + size_t i=0; + while (img->num_out_pictures) + { + StorablePicture *pic=0; + out_storable_picture_get(img, &pic); + if (pic) + free_storable_picture(img, pic); + } + free(img->out_pictures); + img->out_pictures = 0; + img->size_out_pictures = 0; +} + diff --git a/Src/h264dec/ldecod/src/strength_horiz.c b/Src/h264dec/ldecod/src/strength_horiz.c new file mode 100644 index 00000000..bd719deb --- /dev/null +++ b/Src/h264dec/ldecod/src/strength_horiz.c @@ -0,0 +1,659 @@ +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" + +void GetStrengthNormal_Horiz(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p) +{ + // dir == 1 + PixelPos pixMB; + byte StrValue; + Macroblock *MbP; + + assert(NUM_SLICE_TYPES == 5); // the next line assumes this + if (p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + // Set strength to either 3 or 4 regardless of pixel position + StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3; + memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte)); + } + else + { + VideoParameters *p_Vid = MbQ->p_Vid; + int yQ = edge < 16 ? edge - 1: 0; + + p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_LUMA], &pixMB); + + MbP = &(p_Vid->mb_data[pixMB.mb_addr]); + + if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)) + { + PicMotionParams *motion = &p->motion; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int blkP, blkQ, idx; + int blk_x, blk_y ; + int posx; + + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + short mb_x, mb_y; + const int blk_y2 = pixMB.pos_y >> 2; + int cbp_pq, cbp_p, cbp_q; + + posx = pixMB.pos_x >> 2; + blkP = (pixMB.y & 0xFFFC); + blkQ = ((yQ+1) & 0xFFFC); + + cbp_p = (int)MbQ->cbp_blk[0]; + cbp_q = (int)MbP->cbp_blk[0]; + cbp_pq = (((cbp_p >> blkQ) & 0xF) | ((cbp_q >> blkP) & 0xF)); + if (cbp_pq == 0xF) + { + memset(Strength, 2, 16); + return; + //StrValue = 2; + } + + p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + mb_x <<= 2; + mb_y <<= 2; + + blk_x = mb_x + (blkQ & 3); + blk_y = mb_y + (blkQ >> 2); + + for( idx = 0 ; idx < MB_BLOCK_SIZE ; idx += BLOCK_SIZE, posx++, blkP++, blkQ++, blk_x++, cbp_pq>>=1) + { + if (cbp_pq & 1) + StrValue = 2; + else + { + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + + motion_p0=&motion0[blk_y ][blk_x ]; + motion_q0=&motion0[blk_y2][posx]; + motion_p1=&motion1[blk_y ][blk_x ]; + motion_q1=&motion1[blk_y2][posx]; + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0 == ref_q0) + { + if (ref_p0 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else if (ref_p1 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + StrValue = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + StrValue = 1; + } + } + memset(&Strength[idx], (byte) StrValue, BLOCK_SIZE * sizeof(byte)); + } + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3; + memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte)); + } + } +} + + +void GetStrength_Horiz_YUV420(byte Strength[4], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP) +{ + // dir == 1 + byte StrValue; + + assert(NUM_SLICE_TYPES == 5); // the next line assumes this + if (p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + // Set strength to either 3 or 4 regardless of pixel position + StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3; + memset(&Strength[0], (byte) StrValue, 4 * sizeof(byte)); + } + else + { + VideoParameters *p_Vid = MbQ->p_Vid; + int yQ = edge < 16 ? edge - 1: 0; + + if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)) + { + PicMotionParams *motion = &p->motion; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int blkP, blkQ, idx; + int posx; + + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + + const int blk_y2 = pixMB.pos_y >> 2; + int cbp_pq, cbp_p, cbp_q; + + blkP = (pixMB.y & 0xFFFC); + blkQ = ((yQ+1) & 0xFFFC); + + cbp_p = (int)MbQ->cbp_blk[0]; + cbp_q = (int)MbP->cbp_blk[0]; + cbp_pq = (((cbp_p >> blkQ) & 0xF) | ((cbp_q >> blkP) & 0xF)); + if (cbp_pq == 0xF) + { + memset(Strength, 2, 4); + return; + //StrValue = 2; + } + posx = pixMB.pos_x >> 2; +#ifdef _DEBUG + { + short mb_x, mb_y; + get_mb_block_pos_normal(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + assert((mb_x << 2) == posx); + assert(((mb_y << 2) + (blkQ >> 2)) == (blk_y2+1)); + } +#endif + //blk_y = mb_y + (blkQ >> 2); + + for( idx = 0 ; idx < MB_BLOCK_SIZE ; idx += BLOCK_SIZE, posx++, cbp_pq>>=1) + { + if (cbp_pq & 1) + StrValue = 2; + else + { + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + + + motion_p0=&motion0[blk_y2+1][posx]; + motion_q0=&motion0[blk_y2][posx]; + motion_p1=&motion1[blk_y2+1][posx]; + motion_q1=&motion1[blk_y2][posx]; + + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0 == ref_q0) + { + if (ref_p0 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else if (ref_p1 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + StrValue = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + StrValue = 1; + } + } + Strength[idx/4] = StrValue; + //memset(&Strength[idx/4], (byte) StrValue, sizeof(byte)); + } + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3; + memset(&Strength[0], (byte) StrValue, 4 * sizeof(byte)); + } + } +} + +void GetStrengthMBAff_Horiz_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p) +{ + // dir == 1 + short blkP, blkQ, idx; + short blk_x, blk_x2, blk_y, blk_y2 ; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int xQ, yQ; + short mb_x, mb_y; + Macroblock *MbP; + + PixelPos pixP; + int dir_m1 = 0; + + PicMotionParams *motion = &p->motion; + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + yQ = (edge < MB_BLOCK_SIZE ? edge : 1); + + for( idx = 0; idx < 16; ++idx ) + { + VideoParameters *p_Vid = MbQ->p_Vid; + xQ = idx; + + getAffNeighbourPXLumaNB(MbQ, xQ , yQ - 1, &pixP); + blkQ = (short) ((yQ & 0xFFFC) + (xQ >> 2)); + blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2)); + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field); + + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + Strength[idx] = (edge == 0 && (((!MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3; + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + Strength[idx] = (edge == 0 && (((!MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3; + + if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM) + && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) ) + { + if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) ) + Strength[idx] = 2 ; + else + { + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + if (p_Vid->mixedModeEdgeFlag) + { + (Strength[idx] = 1); + } + else + { + get_mb_block_pos_mbaff(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + blk_y = (short) ((mb_y<<2) + (blkQ >> 2)); + blk_x = (short) ((mb_x<<2) + (blkQ & 3)); + blk_y2 = (short) (pixP.pos_y >> 2); + blk_x2 = (short) (pixP.pos_x >> 2); + { + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + motion_p0=&motion0[blk_y ][blk_x ]; + motion_q0=&motion0[blk_y2][blk_x2]; + motion_p1=&motion1[blk_y ][blk_x ]; + motion_q1=&motion1[blk_y2][blk_x2]; + + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || + ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + Strength[idx]=0; + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0==ref_q0) + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + + Strength[idx] = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + Strength[idx] = 1; + } + } + } + } + } + } + } +} + +static __forceinline uint8_t GetMotionStrength(PicMotion *motion0, PicMotion *motion1, int motion_stride, int mvlimit) +{ + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + + ref_p0 = motion0[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[0].ref_pic_id; + ref_p1 = motion1[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[0].ref_pic_id; + ref_q0 = motion0[motion_stride].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[motion_stride].ref_pic_id; + ref_q1 = motion1[motion_stride].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[motion_stride].ref_pic_id; + + if (ref_p0==ref_q0 && ref_p1==ref_q1) + { + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0 == UNDEFINED_REFERENCE) + { + return (byte) ( + (abs( motion1[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit)); + } + else if (ref_p1 == UNDEFINED_REFERENCE) + { + return (byte) ( + (abs( motion0[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit)); + } + else + { + return (byte) ( + (abs( motion0[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit) || + (abs( motion1[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + return (byte) ( + ((abs( motion0[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit ) || + (abs( motion1[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit)) + && + ((abs( motion0[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit) || + (abs( motion1[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit))); + } + } + else if (ref_p0==ref_q1 && ref_p1==ref_q0) + { + return (byte) ( + (abs( motion0[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit) || + (abs( motion1[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit)); + } + else + { + return 1; + } +} + + +void GetStrength_Horiz_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag) +{ + // dir == 1 + assert(NUM_SLICE_TYPES == 5); // the next line assumes this + if ((p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + || ((1 << MbQ->mb_type) & 26112)) + { + // Set strength to either 3 or 4 regardless of pixel position + *(int32_t *)(Strength[0]) = MbP?p->structure==FRAME ? 0x04040404 : 0x03030303 : 0; + *(int32_t *)(Strength[1]) = luma_transform_size_8x8_flag?0:0x03030303; + *(int32_t *)(Strength[2]) = 0x03030303; + *(int32_t *)(Strength[3]) = luma_transform_size_8x8_flag?0:0x03030303; + } + else + { + PicMotionParams *motion = &p->motion; + int motion_stride = p->size_x>>2; + PicMotion *motion0 = &motion->motion[LIST_0][pos_y-!!MbP][pos_x]; + PicMotion *motion1 = &motion->motion[LIST_1][pos_y-!!MbP][pos_x]; + + int cbp_p, cbp_q=(int)MbQ->cbp_blk[0], cbp_pq; + + // edge 0 + if (!MbP) + { + *(int32_t *)(Strength[0]) = 0; + } + else if ((1 << MbP->mb_type) & 26112) + { + *(int32_t *)(Strength[0]) = p->structure==FRAME ? 0x04040404 : 0x03030303; + motion0 += motion_stride; + motion1 += motion_stride; + } + else + { + cbp_p=(int)MbP->cbp_blk[0]; + cbp_pq = (((cbp_p >> 12) & 0xF) | (cbp_q & 0xF)); + if (cbp_pq == 0xF) + { + memset(Strength[0], 2, 4); + } + else + { + if (cbp_pq & (1<<0)) + Strength[0][0] = 2; + else + Strength[0][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit); + + if (cbp_pq & (1<<1)) + Strength[0][1] = 2; + else + Strength[0][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit); + + if (cbp_pq & (1<<2)) + Strength[0][2] = 2; + else + Strength[0][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit); + + if (cbp_pq & (1<<3)) + Strength[0][3] = 2; + else + Strength[0][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit); + } + motion0 += motion_stride; + motion1 += motion_stride; + } + + // edge 1 + if (luma_transform_size_8x8_flag) + { + *(int32_t *)(Strength[1]) = 0; + } + else + { + cbp_pq = ((cbp_q) | (cbp_q >> 4)) & 0xF; + if (cbp_pq == 0xF) + { + memset(Strength[1], 2, 4); + } + else + { + if (cbp_pq & (1<<0)) + Strength[1][0] = 2; + else + Strength[1][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit); + + if (cbp_pq & (1<<1)) + Strength[1][1] = 2; + else + Strength[1][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit); + + if (cbp_pq & (1<<2)) + Strength[1][2] = 2; + else + Strength[1][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit); + + if (cbp_pq & (1<<3)) + Strength[1][3] = 2; + else + Strength[1][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit); + + } + } + + + motion0 += motion_stride; + motion1 += motion_stride; + // edge 2 + cbp_pq = (cbp_q | (cbp_q >> 4)) & 0xF0; + if (cbp_pq == 0xF0) + { + memset(Strength[2], 2, 4); + } + else + { + if (cbp_pq & (0x10<<0)) + Strength[2][0] = 2; + else + Strength[2][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit); + + if (cbp_pq & (0x10<<1)) + Strength[2][1] = 2; + else + Strength[2][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit); + + if (cbp_pq & (0x10<<2)) + Strength[2][2] = 2; + else + Strength[2][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit); + + if (cbp_pq & (0x10<<3)) + Strength[2][3] = 2; + else + Strength[2][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit); + } + + + motion0 += motion_stride; + motion1 += motion_stride; + // edge 3 + if (luma_transform_size_8x8_flag) + { + *(int32_t *)(Strength[3]) = 0; + } + else + { + cbp_pq = (cbp_q | (cbp_q >> 4)) & 0xF00; + if (cbp_pq == 0xF00) + { + memset(Strength[3], 2, 4); + } + else + { + if (cbp_pq & (0x100<<0)) + Strength[3][0] = 2; + else + Strength[3][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit); + + if (cbp_pq & (0x100<<1)) + Strength[3][1] = 2; + else + Strength[3][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit); + + if (cbp_pq & (0x100<<2)) + Strength[3][2] = 2; + else + Strength[3][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit); + + if (cbp_pq & (0x100<<3)) + Strength[3][3] = 2; + else + Strength[3][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit); + } + } + } +}
\ No newline at end of file diff --git a/Src/h264dec/ldecod/src/strength_vert.c b/Src/h264dec/ldecod/src/strength_vert.c new file mode 100644 index 00000000..89e545d4 --- /dev/null +++ b/Src/h264dec/ldecod/src/strength_vert.c @@ -0,0 +1,594 @@ +#include "global.h" +#include "image.h" +#include "mb_access.h" +#include "loopfilter.h" + +void GetStrengthNormal_Vert(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p) +{ + // dir == 0 + PixelPos pixP, pixMB; + byte StrValue; + Macroblock *MbP; + + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + // Set strength to either 3 or 4 regardless of pixel position + StrValue = (edge == 0) ? 4 : 3; + memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte)); + } + else + { + VideoParameters *p_Vid = MbQ->p_Vid; + int xQ = edge - 1; + int yQ = 0; + + p_Vid->getNeighbourX0(MbQ, xQ, p_Vid->mb_size[IS_LUMA], &pixMB); + pixP = pixMB; + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + + if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)) + { + PicMotionParams *motion = &p->motion; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int blkP, blkQ, idx; + int blk_x, blk_x2, blk_y, blk_y2 ; + + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + short mb_x, mb_y; + + p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + mb_x <<= 2; + mb_y <<= 2; + + xQ ++; + + for( idx = 0 ; idx < MB_BLOCK_SIZE ; idx += BLOCK_SIZE ) + { + yQ = idx; + + blkQ = (yQ & 0xFFFC) + (xQ >> 2); + blkP = (idx & 0xFFFC) + (pixP.x >> 2); + + if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) ) + StrValue = 2; + else + { + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + blk_y = mb_y + (blkQ >> 2); + blk_x = mb_x + (blkQ & 3); + blk_y2 = (pixMB.pos_y + idx) >> 2; + blk_x2 = pixMB.pos_x >> 2; + + motion_p0=&motion0[blk_y ][blk_x ]; + motion_q0=&motion0[blk_y2][blk_x2]; + motion_p1=&motion1[blk_y ][blk_x ]; + motion_q1=&motion1[blk_y2][blk_x2]; + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0 == ref_q0) + { + if (ref_p0 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else if (ref_p1 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + StrValue = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + StrValue = 1; + } + } + memset(&Strength[idx], (byte) StrValue, BLOCK_SIZE * sizeof(byte)); + } + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + StrValue = (edge == 0) ? 4 : 3; + memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte)); + } + } +} + +void GetStrength_Vert_YUV420(uint8_t Strength[4], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP) +{ + // dir == 0 + int i; + uint8_t StrValue; + + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + // Set strength to either 3 or 4 regardless of pixel position + StrValue = (edge == 0) ? 4 : 3; + for (i=0;i<4;i++) + { + Strength[i]=StrValue; + } + } + else + { + VideoParameters *p_Vid = MbQ->p_Vid; + if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)) + { + PicMotionParams *motion = &p->motion; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int blkP, blkQ, idx; + int blk_x2, blk_y, blk_y2 ; + + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + short mb_x, mb_y; + const int cbp_p=(int)MbP->cbp_blk[0], cbp_q=(int)MbQ->cbp_blk[0]; + + get_mb_block_pos_normal(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + mb_x <<= 2; + mb_y <<= 2; + + mb_x += edge; + blkQ = edge; + blkP = pixMB.x >> 2; + blk_x2 = pixMB.pos_x >> 2; + + for( idx = 0 ; idx < BLOCK_SIZE ; idx++,blkQ+=BLOCK_SIZE, blkP+=BLOCK_SIZE) + { + if (_bittest(&cbp_p, blkP) || _bittest(&cbp_q, blkQ)) + StrValue = 2; + else + { + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + blk_y = mb_y + idx; + blk_y2 = (pixMB.pos_y >> 2) + idx; + + motion_p0=&motion0[blk_y ][mb_x ]; + motion_q0=&motion0[blk_y2][blk_x2]; + motion_p1=&motion1[blk_y ][mb_x ]; + motion_q1=&motion1[blk_y2][blk_x2]; + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0 == ref_q0) + { + if (ref_p0 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else if (ref_p1 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + } + else + { + StrValue = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + StrValue = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + StrValue = 1; + } + } + Strength[idx] = StrValue; + } + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + StrValue = (edge == 0) ? 4 : 3; + for (i=0;i<4;i++) + { + Strength[i]=StrValue; + } + } + } +} + +// assumes YUV420, MB Aff +void GetStrength_MBAff_Vert_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p) +{ + // dir == 0 + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) + || (MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM)) + { + memset(Strength,(edge == 0) ? 4 : 3, 16); + } + else + { + short blkP, blkQ, idx; + short blk_x, blk_x2, blk_y, blk_y2 ; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + int xQ, yQ; + short mb_x, mb_y; + Macroblock *MbP; + + PixelPos pixP; + + PicMotionParams *motion = &p->motion; + PicMotion **motion0 = motion->motion[LIST_0]; + PicMotion **motion1 = motion->motion[LIST_1]; + xQ = edge; + for( idx = 0; idx < 16; ++idx ) + { + VideoParameters *p_Vid = MbQ->p_Vid; + + yQ = idx; + getAffNeighbourXPLuma(MbQ, xQ - 1, yQ, &pixP); + blkQ = (short) ((yQ & 0xC) + (xQ >> 2)); // blkQ changes once every 4 loop iterations + blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2)); + + MbP = &(p_Vid->mb_data[pixP.mb_addr]); + p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field); + + // Start with Strength=3. or Strength=4 for Mb-edge + Strength[idx] = (edge == 0) ? 4 : 3; + + if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM)) + { + if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) ) + Strength[idx] = 2 ; + else + { + // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + if (p_Vid->mixedModeEdgeFlag) + { + (Strength[idx] = 1); + } + else + { + get_mb_block_pos_mbaff(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y); + blk_y = (short) ((mb_y<<2) + (blkQ >> 2)); + blk_x = (short) ((mb_x<<2) + (blkQ & 3)); + blk_y2 = (short) (pixP.pos_y >> 2); + blk_x2 = (short) (pixP.pos_x >> 2); + { + PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1; + motion_p0=&motion0[blk_y ][blk_x ]; + motion_q0=&motion0[blk_y2][blk_x2]; + motion_p1=&motion1[blk_y ][blk_x ]; + motion_q1=&motion1[blk_y2][blk_x2]; + + ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id; + ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id; + ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id; + ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id; + + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || + ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + Strength[idx]=0; + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0==ref_q0) + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)); + } + else + { + Strength[idx] = (byte) ( + (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + + Strength[idx] = (byte) ( + ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) || + (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit)) + && + ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) || + (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) || + (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) || + (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit))); + } + } + else + { + Strength[idx] = 1; + } + } + } + } + } + } + } +} + +static __forceinline uint8_t GetMotionStrength(PicMotion *motion0, PicMotion *motion1, int mvlimit) +{ + uint8_t StrValue; + h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1; + + ref_p0 = motion0[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[0].ref_pic_id; + ref_p1 = motion1[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[0].ref_pic_id; + ref_q0 = motion0[1].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[1].ref_pic_id; + ref_q1 = motion1[1].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[1].ref_pic_id; + + if (ref_p0==ref_q0 && ref_p1==ref_q1) + { + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion1[0].mv[0] - motion1[1].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion1[1].mv[1]) >= mvlimit)); + } + else if (ref_p1 == UNDEFINED_REFERENCE) + { + StrValue = (byte) ( + (abs( motion0[0].mv[0] - motion0[1].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion0[1].mv[1]) >= mvlimit)); + } + else + { + StrValue = (byte) ( + (abs( motion0[0].mv[0] - motion0[1].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion0[1].mv[1]) >= mvlimit) || + (abs( motion1[0].mv[0] - motion1[1].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion1[1].mv[1]) >= mvlimit)); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + StrValue = (byte) ( + ((abs( motion0[0].mv[0] - motion0[1].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion0[1].mv[1]) >= mvlimit ) || + (abs( motion1[0].mv[0] - motion1[1].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion1[1].mv[1]) >= mvlimit)) + && + ((abs( motion0[0].mv[0] - motion1[1].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion1[1].mv[1]) >= mvlimit) || + (abs( motion1[0].mv[0] - motion0[1].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion0[1].mv[1]) >= mvlimit))); + } + } + else if (ref_p0==ref_q1 && ref_p1==ref_q0) + { + StrValue = (byte) ( + (abs( motion0[0].mv[0] - motion1[1].mv[0]) >= 4) || + (abs( motion0[0].mv[1] - motion1[1].mv[1]) >= mvlimit) || + (abs( motion1[0].mv[0] - motion0[1].mv[0]) >= 4) || + (abs( motion1[0].mv[1] - motion0[1].mv[1]) >= mvlimit)); + } + else + { + StrValue = 1; + } + return StrValue; +} + +void GetStrength_Vert_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag) +{ + // dir == 0 + if ((p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + || ((1 << MbQ->mb_type) & 26112)) + { + // Set strength to either 3 or 4 regardless of pixel position + *(int32_t *)(Strength[0]) = MbP?0x04040404:0; + *(int32_t *)(Strength[1]) = luma_transform_size_8x8_flag?0:0x03030303; + *(int32_t *)(Strength[2]) = 0x03030303; + *(int32_t *)(Strength[3]) = luma_transform_size_8x8_flag?0:0x03030303; + } + else + { + PicMotionParams *motion = &p->motion; + int motion_stride = p->size_x >> 2; + PicMotion *motion0 = &motion->motion[LIST_0][pos_y][pos_x]; + PicMotion *motion1 = &motion->motion[LIST_1][pos_y][pos_x]; + int cbp_q=(int)MbQ->cbp_blk[0]; + + // edge 0 + if (!MbP) + { + *(int32_t *)(Strength[0]) = 0; + } + else if ((1 << MbP->mb_type) & 26112) + { + *(int32_t *)(Strength[0]) = 0x04040404; + } + else + { + int cbp_p = (int)MbP->cbp_blk[0]; + if( ((cbp_q & (1 << 0 )) != 0) || ((cbp_p & (1 << (3))) != 0) ) + Strength[0][0] = 2; + else + Strength[0][0] = GetMotionStrength(&motion0[0-1], &motion1[0-1], mvlimit); + + if( ((cbp_q & (1 << 4 )) != 0) || ((cbp_p & (1 << (4 + 3))) != 0) ) + Strength[0][1] = 2; + else + Strength[0][1] = GetMotionStrength(&motion0[motion_stride-1], &motion1[motion_stride-1], mvlimit); + + if( ((cbp_q & (1 << 8 )) != 0) || ((cbp_p & (1 << (8 + 3))) != 0) ) + Strength[0][2] = 2; + else + Strength[0][2] = GetMotionStrength(&motion0[2*motion_stride-1], &motion1[2*motion_stride-1], mvlimit); + + if( ((cbp_q & (1 << 12 )) != 0) || ((cbp_p & (1 << (12 + 3))) != 0) ) + Strength[0][3] = 2; + else + Strength[0][3] = GetMotionStrength(&motion0[3*motion_stride-1], &motion1[3*motion_stride-1], mvlimit); + } + + // edge 1 + if (luma_transform_size_8x8_flag) + { + *(int32_t *)(Strength[1]) = 0; + } + else + { + if (cbp_q & (3 << 0)) + Strength[1][0] = 2; + else + Strength[1][0] = GetMotionStrength(&motion0[0], &motion1[0], mvlimit); + + if (cbp_q & (3 << 4)) + Strength[1][1] = 2; + else + Strength[1][1] = GetMotionStrength(&motion0[1*motion_stride], &motion1[1*motion_stride], mvlimit); + + if (cbp_q & (3 << 8)) + Strength[1][2] = 2; + else + Strength[1][2] = GetMotionStrength(&motion0[2*motion_stride], &motion1[2*motion_stride], mvlimit); + + if (cbp_q & (3 << 12)) + Strength[1][3] = 2; + else + Strength[1][3] = GetMotionStrength(&motion0[3*motion_stride], &motion1[3*motion_stride], mvlimit); + } + + // edge 2 + if (cbp_q & (6 << 0)) + Strength[2][0] = 2; + else + Strength[2][0] = GetMotionStrength(&motion0[1], &motion1[1], mvlimit); + + if (cbp_q & (6 << 4)) + Strength[2][1] = 2; + else + Strength[2][1] = GetMotionStrength(&motion0[motion_stride+1], &motion1[motion_stride+1], mvlimit); + + if (cbp_q & (6 << 8)) + Strength[2][2] = 2; + else + Strength[2][2] = GetMotionStrength(&motion0[2*motion_stride+1], &motion1[2*motion_stride+1], mvlimit); + + if (cbp_q & (6 << 12)) + Strength[2][3] = 2; + else + Strength[2][3] = GetMotionStrength(&motion0[3*motion_stride+1], &motion1[3*motion_stride+1], mvlimit); + + // edge 3 + if (luma_transform_size_8x8_flag) + { + *(int32_t *)(Strength[3]) = 0; + } + else + { + if (cbp_q & (0xC << 0)) + Strength[3][0] = 2; + else + Strength[3][0] = GetMotionStrength(&motion0[2], &motion1[2], mvlimit); + + if (cbp_q & (0xC << 4)) + Strength[3][1] = 2; + else + Strength[3][1] = GetMotionStrength(&motion0[motion_stride+2], &motion1[motion_stride+2], mvlimit); + + if (cbp_q & (0xC << 8)) + Strength[3][2] = 2; + else + Strength[3][2] = GetMotionStrength(&motion0[2*motion_stride+2], &motion1[2*motion_stride+2], mvlimit); + + if (cbp_q & (0xC << 12)) + Strength[3][3] = 2; + else + Strength[3][3] = GetMotionStrength(&motion0[3*motion_stride+2], &motion1[3*motion_stride+2], mvlimit); + } + } +} diff --git a/Src/h264dec/ldecod/src/transform8x8.c b/Src/h264dec/ldecod/src/transform8x8.c new file mode 100644 index 00000000..1072a6d1 --- /dev/null +++ b/Src/h264dec/ldecod/src/transform8x8.c @@ -0,0 +1,696 @@ + +/*! +*************************************************************************** +* \file transform8x8.c +* +* \brief +* 8x8 transform functions +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Yuri Vatis +* - Jan Muenster +* +* \date +* 12. October 2003 +************************************************************************** +*/ + +#include "global.h" + +#include "image.h" +#include "mb_access.h" +#include "elements.h" +#include "transform8x8.h" +#include "transform.h" +#include "quant.h" +#include <emmintrin.h> + +static void inverse8x8_sse2(h264_short_8x8block_row_t *block) +{ + __m128i a0, a1, a2, a3; + __m128i p0, p1, p2, p3, p4, p5 ,p6, p7; + __m128i b0, b1, b2, b3, b4, b5, b6, b7; + __m128i r0, r1, r2, r3, r4, r5, r6, r7; + + // Horizontal + b0 = _mm_load_si128((__m128i *)(block[0])); + b1 = _mm_load_si128((__m128i *)(block[1])); + b2 = _mm_load_si128((__m128i *)(block[2])); + b3 = _mm_load_si128((__m128i *)(block[3])); + b4 = _mm_load_si128((__m128i *)(block[4])); + b5 = _mm_load_si128((__m128i *)(block[5])); + b6 = _mm_load_si128((__m128i *)(block[6])); + b7 = _mm_load_si128((__m128i *)(block[7])); + + /* rotate 8x8 (ugh) */ + r0 = _mm_unpacklo_epi16(b0, b2); + r1 = _mm_unpacklo_epi16(b1, b3); + r2 = _mm_unpackhi_epi16(b0, b2); + r3 = _mm_unpackhi_epi16(b1, b3); + r4 = _mm_unpacklo_epi16(b4, b6); + r5 = _mm_unpacklo_epi16(b5, b7); + r6 = _mm_unpackhi_epi16(b4, b6); + r7 = _mm_unpackhi_epi16(b5, b7); + + b0 = _mm_unpacklo_epi16(r0, r1); + b1 = _mm_unpackhi_epi16(r0, r1); + b2 = _mm_unpacklo_epi16(r2, r3); + b3 = _mm_unpackhi_epi16(r2, r3); + b4 = _mm_unpacklo_epi16(r4, r5); + b5 = _mm_unpackhi_epi16(r4, r5); + b6 = _mm_unpacklo_epi16(r6, r7); + b7 = _mm_unpackhi_epi16(r6, r7); + + p0 = _mm_unpacklo_epi64(b0, b4); + p1 = _mm_unpackhi_epi64(b0, b4); + p2 = _mm_unpacklo_epi64(b1, b5); + p3 = _mm_unpackhi_epi64(b1, b5); + p4 = _mm_unpacklo_epi64(b2, b6); + p5 = _mm_unpackhi_epi64(b2, b6); + p6 = _mm_unpacklo_epi64(b3, b7); + p7 = _mm_unpackhi_epi64(b3, b7); + + /* perform approx DCT */ + a0 = _mm_add_epi16(p0, p4); // p0 + p4 + a1 = _mm_sub_epi16(p0, p4); // p0 - p4 + r0 = _mm_srai_epi16(p2, 1); // p2 >> 1 + a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1) + r0 = _mm_srai_epi16(p6, 1); // p6 >> 1 + a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1) + + b0 = _mm_add_epi16(a0, a3); // a0 + a3; + b2 = _mm_sub_epi16(a1, a2); // a1 - a2; + b4 = _mm_add_epi16(a1, a2); // a1 + a2; + b6 = _mm_sub_epi16(a0, a3); // a0 - a3; + + //-p3 + p5 - p7 - (p7 >> 1); + r0 = _mm_srai_epi16(p7, 1); // p7 >> 1 + a0 = _mm_sub_epi16(p5, p3); // p5 - p3 + a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7 + a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1) + + //p1 + p7 - p3 - (p3 >> 1); + r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1) + a1 = _mm_add_epi16(p1, p7); // p1 + p7 + a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3 + a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1) + + // -p1 + p7 + p5 + (p5 >> 1); + r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1) + a2 = _mm_sub_epi16(p7, p1); // p7 - p1 + a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5 + a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1) + + // p3 + p5 + p1 + (p1 >> 1); + a3 = _mm_add_epi16(p3, p5); // p3+p5 + a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1 + p1 = _mm_srai_epi16(p1, 1); // p1 >> 1 + a3 = _mm_add_epi16(a3, p1); //p3 + p5 + p1 + (p1 >> 1) + + r0 = _mm_srai_epi16(a3, 2); // a3>>2 + b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2); + r0 = _mm_srai_epi16(a2, 2); // a2>>2 + b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2); + a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe + b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2); + a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe + b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2); + + p0 = _mm_add_epi16(b0, b7); // b0 + b7; + p1 = _mm_sub_epi16(b2, b5); // b2 - b5; + p2 = _mm_add_epi16(b4, b3); // b4 + b3; + p3 = _mm_add_epi16(b6, b1); // b6 + b1; + p4 = _mm_sub_epi16(b6, b1); // b6 - b1; + p5 = _mm_sub_epi16(b4, b3); // b4 - b3; + p6 = _mm_add_epi16(b2, b5); // b2 + b5; + p7 = _mm_sub_epi16(b0, b7); // b0 - b7; + + /* rotate 8x8 (ugh) */ + r0 = _mm_unpacklo_epi16(p0, p2); + r1 = _mm_unpacklo_epi16(p1, p3); + r2 = _mm_unpackhi_epi16(p0, p2); + r3 = _mm_unpackhi_epi16(p1, p3); + r4 = _mm_unpacklo_epi16(p4, p6); + r5 = _mm_unpacklo_epi16(p5, p7); + r6 = _mm_unpackhi_epi16(p4, p6); + r7 = _mm_unpackhi_epi16(p5, p7); + + b0 = _mm_unpacklo_epi16(r0, r1); + b1 = _mm_unpackhi_epi16(r0, r1); + b2 = _mm_unpacklo_epi16(r2, r3); + b3 = _mm_unpackhi_epi16(r2, r3); + b4 = _mm_unpacklo_epi16(r4, r5); + b5 = _mm_unpackhi_epi16(r4, r5); + b6 = _mm_unpacklo_epi16(r6, r7); + b7 = _mm_unpackhi_epi16(r6, r7); + + p0 = _mm_unpacklo_epi64(b0, b4); + p1 = _mm_unpackhi_epi64(b0, b4); + p2 = _mm_unpacklo_epi64(b1, b5); + p3 = _mm_unpackhi_epi64(b1, b5); + p4 = _mm_unpacklo_epi64(b2, b6); + p5 = _mm_unpackhi_epi64(b2, b6); + p6 = _mm_unpacklo_epi64(b3, b7); + p7 = _mm_unpackhi_epi64(b3, b7); + + + /* Vertical */ + + a0 = _mm_add_epi16(p0, p4); // p0 + p4 + a1 = _mm_sub_epi16(p0, p4); // p0 - p4 + r0 = _mm_srai_epi16(p2, 1); // p2 >> 1 + a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1) + r0 = _mm_srai_epi16(p6, 1); // p6 >> 1 + a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1) + + b0 = _mm_add_epi16(a0, a3); // a0 + a3; + b2 = _mm_sub_epi16(a1, a2); // a1 - a2; + b4 = _mm_add_epi16(a1, a2); // a1 + a2; + b6 = _mm_sub_epi16(a0, a3); // a0 - a3; + + //-p3 + p5 - p7 - (p7 >> 1); + r0 = _mm_srai_epi16(p7, 1); // p7 >> 1 + a0 = _mm_sub_epi16(p5, p3); // p5 - p3 + a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7 + a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1) + + //p1 + p7 - p3 - (p3 >> 1); + r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1) + a1 = _mm_add_epi16(p1, p7); // p1 + p7 + a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3 + a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1) + + // -p1 + p7 + p5 + (p5 >> 1); + r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1) + a2 = _mm_sub_epi16(p7, p1); // p7 - p1 + a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5 + a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1) + + // p3 + p5 + p1 + (p1 >> 1); + r0 = _mm_srai_epi16(p1, 1); // p1 >> 1 + a3 = _mm_add_epi16(p3, p5); // p3+p5 + a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1 + a3 = _mm_add_epi16(a3, r0); //p3 + p5 + p1 + (p1 >> 1) + + r0 = _mm_srai_epi16(a3, 2); // a3>>2 + b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2); + r0 = _mm_srai_epi16(a2, 2); // a2>>2 + b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2); + a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe + b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2); + a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe + b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2); + + r0 = _mm_add_epi16(b0, b7); // b0 + b7; + _mm_store_si128((__m128i *)(block[0]), r0); + r1 = _mm_sub_epi16(b2, b5); // b2 - b5; + _mm_store_si128((__m128i *)(block[1]), r1); + r2 = _mm_add_epi16(b4, b3); // b4 + b3; + _mm_store_si128((__m128i *)(block[2]), r2); + r3 = _mm_add_epi16(b6, b1); // b6 + b1; + _mm_store_si128((__m128i *)(block[3]), r3); + r4 = _mm_sub_epi16(b6, b1); // b6 - b1; + _mm_store_si128((__m128i *)(block[4]), r4); + r5 = _mm_sub_epi16(b4, b3); // b4 - b3; + _mm_store_si128((__m128i *)(block[5]), r5); + r6 = _mm_add_epi16(b2, b5); // b2 + b5; + _mm_store_si128((__m128i *)(block[6]), r6); + r7 = _mm_sub_epi16(b0, b7); // b0 - b7; + _mm_store_si128((__m128i *)(block[7]), r7); +} + +static void inverse8x8(h264_short_8x8block_row_t *block) +{ + int i; + + //int tmp[64]; + //int *pTmp = tmp; + int a0, a1, a2, a3; + int p0, p1, p2, p3, p4, p5 ,p6, p7; + int b0, b1, b2, b3, b4, b5, b6, b7; + + // Horizontal + for (i=0; i < BLOCK_SIZE_8x8; i++) + { + p0 = block[i][0]; + p1 = block[i][1]; + p2 = block[i][2]; + p3 = block[i][3]; + p4 = block[i][4]; + p5 = block[i][5]; + p6 = block[i][6]; + p7 = block[i][7]; + + a0 = p0 + p4; + a1 = p0 - p4; + a2 = p6 - (p2 >> 1); + a3 = p2 + (p6 >> 1); + + b0 = a0 + a3; + b2 = a1 - a2; + b4 = a1 + a2; + b6 = a0 - a3; + + a0 = p5 - p3 - p7 - (p7 >> 1); + a1 = p1 + p7 - p3 - (p3 >> 1); + a2 = p7 - p1 + p5 + (p5 >> 1); + a3 = p3 + p5 + p1 + (p1 >> 1); + + + b1 = a0 + (a3>>2); + b3 = a1 + (a2>>2); + b5 = a2 - (a1>>2); + b7 = a3 - (a0>>2); + + block[i][0] = b0 + b7; + block[i][1] = b2 - b5; + block[i][2] = b4 + b3; + block[i][3] = b6 + b1; + block[i][4] = b6 - b1; + block[i][5] = b4 - b3; + block[i][6] = b2 + b5; + block[i][7] = b0 - b7; + } + + // Vertical + for (i=0; i < BLOCK_SIZE_8x8; i++) + { + // pTmp = tmp + i; + p0 = block[0][i]; + p1 = block[1][i]; + p2 = block[2][i]; + p3 = block[3][i]; + p4 = block[4][i]; + p5 = block[5][i]; + p6 = block[6][i]; + p7 = block[7][i]; + + a0 = p0 + p4; + a1 = p0 - p4; + a2 = p6 - (p2>>1); + a3 = p2 + (p6>>1); + + b0 = a0 + a3; + b2 = a1 - a2; + b4 = a1 + a2; + b6 = a0 - a3; + + a0 = -p3 + p5 - p7 - (p7 >> 1); + a1 = p1 + p7 - p3 - (p3 >> 1); + a2 = -p1 + p7 + p5 + (p5 >> 1); + a3 = p3 + p5 + p1 + (p1 >> 1); + + + b1 = a0 + (a3 >> 2); + b7 = a3 - (a0 >> 2); + b3 = a1 + (a2 >> 2); + b5 = a2 - (a1 >> 2); + + block[0][i] = b0 + b7; + block[1][i] = b2 - b5; + block[2][i] = b4 + b3; + block[3][i] = b6 + b1; + block[4][i] = b6 - b1; + block[5][i] = b4 - b3; + block[6][i] = b2 + b5; + block[7][i] = b0 - b7; + } + +} +#if defined(_DEBUG) || defined(_M_IX64) +void itrans8x8_sse2(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x) +{ + __m128i a0, a1, a2, a3; + __m128i p0, p1, p2, p3, p4, p5 ,p6, p7; + __m128i b0, b1, b2, b3, b4, b5, b6, b7; + __m128i r0, r1, r2, r3, r4, r5, r6, r7; + __m128i const32, zero; + __declspec(align(32)) static const int16_t c32[8] = {32, 32, 32, 32, 32, 32, 32, 32}; + __m128i pred0, pred1; + + const32 = _mm_load_si128((const __m128i *)c32); + zero = _mm_setzero_si128(); + + // Horizontal + b0 = _mm_load_si128((__m128i *)(block[0])); + b1 = _mm_load_si128((__m128i *)(block[1])); + b2 = _mm_load_si128((__m128i *)(block[2])); + b3 = _mm_load_si128((__m128i *)(block[3])); + b4 = _mm_load_si128((__m128i *)(block[4])); + b5 = _mm_load_si128((__m128i *)(block[5])); + b6 = _mm_load_si128((__m128i *)(block[6])); + b7 = _mm_load_si128((__m128i *)(block[7])); + + /* rotate 8x8 (ugh) */ + r0 = _mm_unpacklo_epi16(b0, b2); + r1 = _mm_unpacklo_epi16(b1, b3); + r2 = _mm_unpackhi_epi16(b0, b2); + r3 = _mm_unpackhi_epi16(b1, b3); + r4 = _mm_unpacklo_epi16(b4, b6); + r5 = _mm_unpacklo_epi16(b5, b7); + r6 = _mm_unpackhi_epi16(b4, b6); + r7 = _mm_unpackhi_epi16(b5, b7); + + b0 = _mm_unpacklo_epi16(r0, r1); + b1 = _mm_unpackhi_epi16(r0, r1); + b2 = _mm_unpacklo_epi16(r2, r3); + b3 = _mm_unpackhi_epi16(r2, r3); + b4 = _mm_unpacklo_epi16(r4, r5); + b5 = _mm_unpackhi_epi16(r4, r5); + b6 = _mm_unpacklo_epi16(r6, r7); + b7 = _mm_unpackhi_epi16(r6, r7); + + p0 = _mm_unpacklo_epi64(b0, b4); + p1 = _mm_unpackhi_epi64(b0, b4); + p2 = _mm_unpacklo_epi64(b1, b5); + p3 = _mm_unpackhi_epi64(b1, b5); + p4 = _mm_unpacklo_epi64(b2, b6); + p5 = _mm_unpackhi_epi64(b2, b6); + p6 = _mm_unpacklo_epi64(b3, b7); + p7 = _mm_unpackhi_epi64(b3, b7); + + /* perform approx DCT */ + a0 = _mm_add_epi16(p0, p4); // p0 + p4 + a1 = _mm_sub_epi16(p0, p4); // p0 - p4 + r0 = _mm_srai_epi16(p2, 1); // p2 >> 1 + a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1) + r0 = _mm_srai_epi16(p6, 1); // p6 >> 1 + a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1) + + b0 = _mm_add_epi16(a0, a3); // a0 + a3; + b2 = _mm_sub_epi16(a1, a2); // a1 - a2; + b4 = _mm_add_epi16(a1, a2); // a1 + a2; + b6 = _mm_sub_epi16(a0, a3); // a0 - a3; + + //-p3 + p5 - p7 - (p7 >> 1); + r0 = _mm_srai_epi16(p7, 1); // p7 >> 1 + a0 = _mm_sub_epi16(p5, p3); // p5 - p3 + a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7 + a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1) + + //p1 + p7 - p3 - (p3 >> 1); + r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1) + a1 = _mm_add_epi16(p1, p7); // p1 + p7 + a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3 + a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1) + + // -p1 + p7 + p5 + (p5 >> 1); + r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1) + a2 = _mm_sub_epi16(p7, p1); // p7 - p1 + a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5 + a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1) + + // p3 + p5 + p1 + (p1 >> 1); + a3 = _mm_add_epi16(p3, p5); // p3+p5 + a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1 + p1 = _mm_srai_epi16(p1, 1); // p1 >> 1 + a3 = _mm_add_epi16(a3, p1); //p3 + p5 + p1 + (p1 >> 1) + + r0 = _mm_srai_epi16(a3, 2); // a3>>2 + b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2); + r0 = _mm_srai_epi16(a2, 2); // a2>>2 + b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2); + a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe + b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2); + a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe + b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2); + + p0 = _mm_add_epi16(b0, b7); // b0 + b7; + p1 = _mm_sub_epi16(b2, b5); // b2 - b5; + p2 = _mm_add_epi16(b4, b3); // b4 + b3; + p3 = _mm_add_epi16(b6, b1); // b6 + b1; + p4 = _mm_sub_epi16(b6, b1); // b6 - b1; + p5 = _mm_sub_epi16(b4, b3); // b4 - b3; + p6 = _mm_add_epi16(b2, b5); // b2 + b5; + p7 = _mm_sub_epi16(b0, b7); // b0 - b7; + + /* rotate 8x8 (ugh) */ + r0 = _mm_unpacklo_epi16(p0, p2); + r1 = _mm_unpacklo_epi16(p1, p3); + r2 = _mm_unpackhi_epi16(p0, p2); + r3 = _mm_unpackhi_epi16(p1, p3); + r4 = _mm_unpacklo_epi16(p4, p6); + r5 = _mm_unpacklo_epi16(p5, p7); + r6 = _mm_unpackhi_epi16(p4, p6); + r7 = _mm_unpackhi_epi16(p5, p7); + + b0 = _mm_unpacklo_epi16(r0, r1); + b1 = _mm_unpackhi_epi16(r0, r1); + b2 = _mm_unpacklo_epi16(r2, r3); + b3 = _mm_unpackhi_epi16(r2, r3); + b4 = _mm_unpacklo_epi16(r4, r5); + b5 = _mm_unpackhi_epi16(r4, r5); + b6 = _mm_unpacklo_epi16(r6, r7); + b7 = _mm_unpackhi_epi16(r6, r7); + + p0 = _mm_unpacklo_epi64(b0, b4); + p1 = _mm_unpackhi_epi64(b0, b4); + p2 = _mm_unpacklo_epi64(b1, b5); + p3 = _mm_unpackhi_epi64(b1, b5); + p4 = _mm_unpacklo_epi64(b2, b6); + p5 = _mm_unpackhi_epi64(b2, b6); + p6 = _mm_unpacklo_epi64(b3, b7); + p7 = _mm_unpackhi_epi64(b3, b7); + + + /* Vertical */ + + a0 = _mm_add_epi16(p0, p4); // p0 + p4 + a1 = _mm_sub_epi16(p0, p4); // p0 - p4 + r0 = _mm_srai_epi16(p2, 1); // p2 >> 1 + a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1) + r0 = _mm_srai_epi16(p6, 1); // p6 >> 1 + a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1) + + b0 = _mm_add_epi16(a0, a3); // a0 + a3; + b2 = _mm_sub_epi16(a1, a2); // a1 - a2; + b4 = _mm_add_epi16(a1, a2); // a1 + a2; + b6 = _mm_sub_epi16(a0, a3); // a0 - a3; + + //-p3 + p5 - p7 - (p7 >> 1); + r0 = _mm_srai_epi16(p7, 1); // p7 >> 1 + a0 = _mm_sub_epi16(p5, p3); // p5 - p3 + a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7 + a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1) + + //p1 + p7 - p3 - (p3 >> 1); + r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1) + a1 = _mm_add_epi16(p1, p7); // p1 + p7 + a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3 + a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1) + + // -p1 + p7 + p5 + (p5 >> 1); + r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1) + a2 = _mm_sub_epi16(p7, p1); // p7 - p1 + a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5 + a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1) + + // p3 + p5 + p1 + (p1 >> 1); + r0 = _mm_srai_epi16(p1, 1); // p1 >> 1 + a3 = _mm_add_epi16(p3, p5); // p3+p5 + a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1 + a3 = _mm_add_epi16(a3, r0); //p3 + p5 + p1 + (p1 >> 1) + + r0 = _mm_srai_epi16(a3, 2); // a3>>2 + b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2); + r0 = _mm_srai_epi16(a2, 2); // a2>>2 + b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2); + a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe + b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2); + a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe + b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2); + + r0 = _mm_add_epi16(b0, b7); // b0 + b7; + r1 = _mm_sub_epi16(b2, b5); // b2 - b5; + r2 = _mm_add_epi16(b4, b3); // b4 + b3; + r3 = _mm_add_epi16(b6, b1); // b6 + b1; + r4 = _mm_sub_epi16(b6, b1); // b6 - b1; + r5 = _mm_sub_epi16(b4, b3); // b4 - b3; + r6 = _mm_add_epi16(b2, b5); // b2 + b5; + r7 = _mm_sub_epi16(b0, b7); // b0 - b7; + + + // add in prediction values + pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[0][pos_x])); + pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[1][pos_x])); + // (x + 32) >> 6 + r0 = _mm_adds_epi16(r0, const32); + r0 = _mm_srai_epi16(r0, 6); + r1 = _mm_adds_epi16(r1, const32); + r1 = _mm_srai_epi16(r1, 6); + pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + pred0 = _mm_adds_epi16(pred0, r0); + pred1 = _mm_adds_epi16(pred1, r1); + + pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + // store + _mm_storel_epi64((__m128i *)(&mb_rec[0][pos_x]), pred0); + // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. + pred0 = _mm_srli_si128(pred0, 8); + _mm_storel_epi64((__m128i *)(&mb_rec[1][pos_x]), pred0); + + /* --- */ + + pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[2][pos_x])); + pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[3][pos_x])); + // (x + 32) >> 6 + r2 = _mm_adds_epi16(r2, const32); + r2 = _mm_srai_epi16(r2, 6); + r3 = _mm_adds_epi16(r3, const32); + r3 = _mm_srai_epi16(r3, 6); + pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + pred0 = _mm_adds_epi16(pred0, r2); + pred1 = _mm_adds_epi16(pred1, r3); + + pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + // store + _mm_storel_epi64((__m128i *)(&mb_rec[2][pos_x]), pred0); + // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. + pred0 = _mm_srli_si128(pred0, 8); + _mm_storel_epi64((__m128i *)(&mb_rec[3][pos_x]), pred0); + + /* --- */ + + pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[4][pos_x])); + pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[5][pos_x])); + // (x + 32) >> 6 + r4 = _mm_adds_epi16(r4, const32); + r4 = _mm_srai_epi16(r4, 6); + r5 = _mm_adds_epi16(r5, const32); + r5 = _mm_srai_epi16(r5, 6); + pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + pred0 = _mm_adds_epi16(pred0, r4); + pred1 = _mm_adds_epi16(pred1, r5); + + pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + // store + _mm_storel_epi64((__m128i *)(&mb_rec[4][pos_x]), pred0); + // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. + pred0 = _mm_srli_si128(pred0, 8); + _mm_storel_epi64((__m128i *)(&mb_rec[5][pos_x]), pred0); + + /* --- */ + + pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[6][pos_x])); + pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[7][pos_x])); + // (x + 32) >> 6 + r6 = _mm_adds_epi16(r6, const32); + r6 = _mm_srai_epi16(r6, 6); + r7 = _mm_adds_epi16(r7, const32); + r7 = _mm_srai_epi16(r7, 6); + pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short + pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short + pred0 = _mm_adds_epi16(pred0, r6); + pred1 = _mm_adds_epi16(pred1, r7); + + pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char + + // store + _mm_storel_epi64((__m128i *)&mb_rec[6][pos_x], pred0); + // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily. + pred0 = _mm_srli_si128(pred0, 8); + _mm_storel_epi64((__m128i *)&mb_rec[7][pos_x], pred0); +} + +#endif + +#ifdef _M_IX86 +// TODO!! fix for 16bit coefficients instead of 32 +static void sample_reconstruct8x8_mmx(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *mb_rres8, int pos_x) +{ + __asm + { + mov esi, 8 // loop 8 times + + mov eax, mb_rec + add eax, pos_x + + mov ebx, mb_pred + add ebx, pos_x + + mov ecx, mb_rres8 + + // mm0 : constant value 32 + mov edx, 0x00200020 + movd mm0, edx + punpckldq mm0, mm0 + // mm5: zero + pxor mm7, mm7 + +loop8: + + movq mm1, MMWORD PTR 0[ecx] + paddw mm1, mm0 // rres + 32 + psraw mm1, 6 // (rres + 32) >> 6 + movq mm2, MMWORD PTR 0[ebx] + punpcklbw mm2, mm7 // convert pred_row from unsigned char to short + paddsw mm2, mm1 // pred_row + rres_row + packuswb mm2, mm7 + movq MMWORD PTR 0[eax], mm2 + + + add eax, 16 + add ebx, 16 + add ecx, 16 + + sub esi, 1 + jne loop8 + emms + } +} +#endif + +// benski> unused, left in place for unit testing and if we ever need to port the decoder to non-intel +static void sample_reconstruct8x8(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *mb_rres8, int pos_x, int max_imgpel_value) +{ + int i,j; + for( j = 0; j < 8; j++) + { + imgpel *rec_row = mb_rec[j] + pos_x; + const short *rres_row = mb_rres8[j]; + const imgpel *pred_row = mb_pred[j] + pos_x; + + for( i = 0; i < 8; i++) + rec_row[i] = (imgpel) iClip1(max_imgpel_value, pred_row[i] + rshift_rnd_sf(rres_row[i], DQ_BITS_8)); + } +} +/*! +*********************************************************************** +* \brief +* Inverse 8x8 transformation +*********************************************************************** +*/ +#ifdef _M_IX86 +void itrans8x8_mmx(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x) +{ + inverse8x8((h264_short_8x8block_row_t *)block); + sample_reconstruct8x8_mmx(mb_rec, mb_pred, block, pos_x); +} +#endif + +void itrans8x8_c(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x) +{ + inverse8x8((h264_short_8x8block_row_t *)block); + sample_reconstruct8x8(mb_rec, mb_pred, block, pos_x, 255); +} + +void itrans8x8_lossless(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x) +{ + int i,j; + + for( j = 0; j < 8; j++) + { + imgpel *rec_row = mb_rec[j] + pos_x; + const short *rres_row = block[j]; + const imgpel *pred_row = mb_pred[j] + pos_x; + for( i = 0; i < 8; i++) + rec_row[i] = (imgpel) iClip1(255, (rres_row[i] + (long)pred_row[i])); + } +}
\ No newline at end of file diff --git a/Src/h264dec/ldecod/src/vlc.c b/Src/h264dec/ldecod/src/vlc.c new file mode 100644 index 00000000..397a7d08 --- /dev/null +++ b/Src/h264dec/ldecod/src/vlc.c @@ -0,0 +1,1769 @@ +/*! +************************************************************************ +* \file vlc.c +* +* \brief +* VLC support functions +* +* \author +* Main contributors (see contributors.h for copyright, address and affiliation details) +* - Inge Lille-Langøy <inge.lille-langoy@telenor.com> +* - Detlev Marpe <marpe@hhi.de> +* - Gabi Blaettermann +************************************************************************ +*/ +#include "contributors.h" + +#include "global.h" +#include "vlc.h" +#include "elements.h" +#include "optim.h" +#include <emmintrin.h> + +// A little trick to avoid those horrible #if TRACE all over the source code +#if TRACE +#define SYMTRACESTRING(s) strncpy(symbol.tracestring,s,TRACESTRING_SIZE) +#else +#define SYMTRACESTRING(s) // do nothing +#endif + +static int ShowBits (const uint8_t buffer[],int totbitoffset,int bitcount, int numbits); + +// Note that all NA values are filled with 0 + +/*! +************************************************************************************* +* \brief +* ue_v, reads an ue(v) syntax element, the length in bits is stored in +* the global p_Dec->UsedBits variable +* +* \param tracestring +* the string for the trace file +* +* \param bitstream +* the stream to be read from +* +* \return +* the value of the coded syntax element +* +************************************************************************************* +*/ +int ue_v (const char *tracestring, Bitstream *bitstream) +{ + SyntaxElement symbol; + + //assert (bitstream->streamBuffer != NULL); + symbol.mapping = linfo_ue; // Mapping rule + SYMTRACESTRING(tracestring); + readSyntaxElement_VLC (&symbol, bitstream); + return symbol.value1; +} + + +/*! +************************************************************************************* +* \brief +* ue_v, reads an se(v) syntax element, the length in bits is stored in +* the global p_Dec->UsedBits variable +* +* \param tracestring +* the string for the trace file +* +* \param bitstream +* the stream to be read from +* +* \return +* the value of the coded syntax element +* +************************************************************************************* +*/ +int se_v (const char *tracestring, Bitstream *bitstream) +{ + SyntaxElement symbol; + + //assert (bitstream->streamBuffer != NULL); + symbol.mapping = linfo_se; // Mapping rule: signed integer + SYMTRACESTRING(tracestring); + readSyntaxElement_VLC (&symbol, bitstream); + return symbol.value1; +} + + +/*! +************************************************************************************* +* \brief +* ue_v, reads an u(v) syntax element, the length in bits is stored in +* the global p_Dec->UsedBits variable +* +* \param LenInBits +* length of the syntax element +* +* \param tracestring +* the string for the trace file +* +* \param bitstream +* the stream to be read from +* +* \return +* the value of the coded syntax element +* +************************************************************************************* +*/ +int u_v (int LenInBits, const char*tracestring, Bitstream *bitstream) +{ + return readSyntaxElement_FLC(bitstream, LenInBits); +} + +/*! +************************************************************************************* +* \brief +* i_v, reads an i(v) syntax element, the length in bits is stored in +* the global p_Dec->UsedBits variable +* +* \param LenInBits +* length of the syntax element +* +* \param tracestring +* the string for the trace file +* +* \param bitstream +* the stream to be read from +* +* \return +* the value of the coded syntax element +* +************************************************************************************* +*/ +int i_v (int LenInBits, const char*tracestring, Bitstream *bitstream) +{ + int val; + val = readSyntaxElement_FLC (bitstream, LenInBits); + + // can be negative + val = -( val & (1 << (LenInBits - 1)) ) | val; + + return val; +} + + +/*! +************************************************************************************* +* \brief +* ue_v, reads an u(1) syntax element, the length in bits is stored in +* the global p_Dec->UsedBits variable +* +* \param tracestring +* the string for the trace file +* +* \param bitstream +* the stream to be read from +* +* \return +* the value of the coded syntax element +* +************************************************************************************* +*/ +Boolean u_1 (const char *tracestring, Bitstream *bitstream) +{ + return (Boolean) u_v (1, tracestring, bitstream); +} + + + +/*! +************************************************************************ +* \brief +* mapping rule for ue(v) syntax elements +* \par Input: +* lenght and info +* \par Output: +* number in the code table +************************************************************************ +*/ +void linfo_ue(int len, int info, int *value1, int *dummy) +{ + //assert ((len >> 1) < 32); + *value1 = (int) (((unsigned int) 1 << (len >> 1)) + (unsigned int) (info) - 1); +} + +/*! +************************************************************************ +* \brief +* mapping rule for se(v) syntax elements +* \par Input: +* lenght and info +* \par Output: +* signed mvd +************************************************************************ +*/ +void linfo_se(int len, int info, int *value1, int *dummy) +{ + //assert ((len >> 1) < 32); + unsigned int n = ((unsigned int) 1 << (len >> 1)) + (unsigned int) info - 1; + *value1 = (n + 1) >> 1; + if((n & 0x01) == 0) // lsb is signed bit + *value1 = -*value1; +} + + +/*! +************************************************************************ +* \par Input: +* length and info +* \par Output: +* cbp (intra) +************************************************************************ +*/ +void linfo_cbp_intra_normal(int len,int info,int *cbp, int *dummy) +{ + int cbp_idx; + + linfo_ue(len, info, &cbp_idx, dummy); + *cbp=NCBP[1][cbp_idx][0]; +} + + +/*! +************************************************************************ +* \par Input: +* length and info +* \par Output: +* cbp (intra) +************************************************************************ +*/ +void linfo_cbp_intra_other(int len,int info,int *cbp, int *dummy) +{ + int cbp_idx; + + linfo_ue(len, info, &cbp_idx, dummy); + *cbp=NCBP[0][cbp_idx][0]; +} + +/*! +************************************************************************ +* \par Input: +* length and info +* \par Output: +* cbp (inter) +************************************************************************ +*/ +void linfo_cbp_inter_normal(int len,int info,int *cbp, int *dummy) +{ + int cbp_idx; + + linfo_ue(len, info, &cbp_idx, dummy); + *cbp=NCBP[1][cbp_idx][1]; +} + +/*! +************************************************************************ +* \par Input: +* length and info +* \par Output: +* cbp (inter) +************************************************************************ +*/ +void linfo_cbp_inter_other(int len,int info,int *cbp, int *dummy) +{ + int cbp_idx; + + linfo_ue(len, info, &cbp_idx, dummy); + *cbp=NCBP[0][cbp_idx][1]; +} + +/*! +************************************************************************ +* \par Input: +* length and info +* \par Output: +* level, run +************************************************************************ +*/ +void linfo_levrun_inter(int len, int info, int *level, int *irun) +{ + //assert (((len >> 1) - 5) < 32); + + if (len <= 9) + { + int l2 = imax(0,(len >> 1)-1); + int inf = info >> 1; + + *level = NTAB1[l2][inf][0]; + *irun = NTAB1[l2][inf][1]; + if ((info & 0x01) == 1) + *level = -*level; // make sign + } + else // if len > 9, skip using the array + { + *irun = (info & 0x1e) >> 1; + *level = LEVRUN1[*irun] + (info >> 5) + ( 1 << ((len >> 1) - 5)); + if ((info & 0x01) == 1) + *level = -*level; + } + + if (len == 1) // EOB + *level = 0; +} + + +/*! +************************************************************************ +* \par Input: +* length and info +* \par Output: +* level, run +************************************************************************ +*/ +void linfo_levrun_c2x2(int len, int info, int *level, int *irun) +{ + if (len<=5) + { + int l2 = imax(0, (len >> 1) - 1); + int inf = info >> 1; + *level = NTAB3[l2][inf][0]; + *irun = NTAB3[l2][inf][1]; + if ((info & 0x01) == 1) + *level = -*level; // make sign + } + else // if len > 5, skip using the array + { + *irun = (info & 0x06) >> 1; + *level = LEVRUN3[*irun] + (info >> 3) + (1 << ((len >> 1) - 3)); + if ((info & 0x01) == 1) + *level = -*level; + } + + if (len == 1) // EOB + *level = 0; +} + +/*! +************************************************************************ +* \brief +* read next UVLC codeword from UVLC-partition and +* map it to the corresponding syntax element +************************************************************************ +*/ +int readSyntaxElement_VLC(SyntaxElement *sym, Bitstream *currStream) +{ + + int info; + sym->len = GetVLCSymbol (currStream->streamBuffer, currStream->frame_bitoffset, &info, currStream->bitstream_length); + if (sym->len == -1) + return -1; + + currStream->frame_bitoffset += sym->len; + sym->mapping(sym->len, info, &(sym->value1), &(sym->value2)); + + return 1; +} + + +/*! +************************************************************************ +* \brief +* read next UVLC codeword from UVLC-partition and +* map it to the corresponding syntax element +************************************************************************ +*/ +int readSyntaxElement_UVLC(SyntaxElement *sym, struct datapartition *dp) +{ + return (readSyntaxElement_VLC(sym, dp->bitstream)); +} + +/*! +************************************************************************ +* \brief +* read next VLC codeword for 4x4 Intra Prediction Mode and +* map it to the corresponding Intra Prediction Direction +************************************************************************ +*/ +int readSyntaxElement_Intra4x4PredictionMode(SyntaxElement *sym, Bitstream *currStream) +{ + int info; + sym->len = GetVLCSymbol_IntraMode (currStream->streamBuffer, currStream->frame_bitoffset, &info, currStream->bitstream_length); + + if (sym->len == -1) + return -1; + + currStream->frame_bitoffset += sym->len; + sym->value1 = (sym->len == 1) ? -1 : info; + +#if TRACE + tracebits2(sym->tracestring, sym->len, sym->value1); +#endif + + return 1; +} + +int GetVLCSymbol_IntraMode (const uint8_t buffer[],int totbitoffset,int *info, int bytecount) +{ + int byteoffset = (totbitoffset >> 3); // byte from start of buffer + int bitoffset = (7 - (totbitoffset & 0x07)); // bit from start of byte + const uint8_t *cur_byte = &(buffer[byteoffset]); + int ctr_bit = (*cur_byte & (0x01 << bitoffset)); // control bit for current bit posision + + //First bit + if (ctr_bit) + { + *info = 0; + return 1; + } + + if (byteoffset >= bytecount) + { + return -1; + } + else + { + int inf = (*(cur_byte) << 8) + *(cur_byte + 1); + inf <<= (sizeof(uint8_t) * 8) - bitoffset; + inf = inf & 0xFFFF; + inf >>= (sizeof(uint8_t) * 8) * 2 - 3; + + *info = inf; + return 4; // return absolute offset in bit from start of frame + } +} + + +/*! +************************************************************************ +* \brief +* test if bit buffer contains only stop bit +* +* \param buffer +* buffer containing VLC-coded data bits +* \param totbitoffset +* bit offset from start of partition +* \param bytecount +* buffer length +* \return +* true if more bits available +************************************************************************ +*/ +int more_rbsp_data (const uint8_t buffer[],int totbitoffset,int bytecount) +{ + long byteoffset = (totbitoffset >> 3); // byte from start of buffer + // there is more until we're in the last byte + if (byteoffset < (bytecount - 1)) + return TRUE; + else + { + int bitoffset = (7 - (totbitoffset & 0x07)); // bit from start of byte + const uint8_t *cur_byte = &(buffer[byteoffset]); + // read one bit + int ctr_bit = ctr_bit = ((*cur_byte)>> (bitoffset--)) & 0x01; // control bit for current bit posision + + //assert (byteoffset<bytecount); + + // a stop bit has to be one + if (ctr_bit==0) + return TRUE; + else + { + int cnt = 0; + + while (bitoffset>=0 && !cnt) + { + cnt |= ((*cur_byte)>> (bitoffset--)) & 0x01; // set up control bit + } + + return (cnt); + } + } +} + + +/*! +************************************************************************ +* \brief +* Check if there are symbols for the next MB +************************************************************************ +*/ +int uvlc_startcode_follows(Slice *currSlice, int dummy) +{ + byte dp_Nr = assignSE2partition[currSlice->dp_mode][SE_MBTYPE]; + DataPartition *dP = &(currSlice->partArr[dp_Nr]); + Bitstream *currStream = dP->bitstream; + const uint8_t *buf = currStream->streamBuffer; + + return (!(more_rbsp_data(buf, currStream->frame_bitoffset,currStream->bitstream_length))); +} + + + +/*! +************************************************************************ +* \brief +* read one exp-golomb VLC symbol +* +* \param buffer +* containing VLC-coded data bits +* \param totbitoffset +* bit offset from start of partition +* \param info +* returns the value of the symbol +* \param bytecount +* buffer length +* \return +* bits read +************************************************************************ +*/ +int GetVLCSymbol (const uint8_t buffer[],int totbitoffset,int *info, int bytecount) +{ + long byteoffset = (totbitoffset >> 3); // byte from start of buffer + int bitoffset = (7 - (totbitoffset & 0x07)); // bit from start of byte + int bitcounter = 1; + int len = 0; + const uint8_t *cur_byte = &(buffer[byteoffset]); + int ctr_bit = ((*cur_byte) >> (bitoffset)) & 0x01; // control bit for current bit posision + + while (ctr_bit == 0) + { // find leading 1 bit + len++; + bitcounter++; + bitoffset--; + bitoffset &= 0x07; + cur_byte += (bitoffset == 7); + byteoffset+= (bitoffset == 7); + ctr_bit = ((*cur_byte) >> (bitoffset)) & 0x01; + } + + if (byteoffset + ((len + 7) >> 3) > bytecount) + return -1; + else + { + // make infoword + int inf = 0; // shortest possible code is 1, then info is always 0 + + while (len--) + { + bitoffset --; + bitoffset &= 0x07; + cur_byte += (bitoffset == 7); + bitcounter++; + inf <<= 1; + inf |= ((*cur_byte) >> (bitoffset)) & 0x01; + } + + *info = inf; + return bitcounter; // return absolute offset in bit from start of frame + } +} + + +/*! +************************************************************************ +* \brief +* Reads bits from the bitstream buffer (Threshold based) +* +* \param inf +* bytes to extract numbits from with bitoffset already applied +* \param numbits +* number of bits to read +* +************************************************************************ +*/ + +static inline int ShowBitsThres16(int inf, int numbits) +{ + return ((inf) >> ((sizeof(uint8_t) * 16) - (numbits))); +} + +//static inline int ShowBitsThres (int inf, int bitcount, int numbits) +static inline int ShowBitsThres(int inf, int numbits) +{ + return ((inf) >> ((sizeof(uint8_t) * 24) - (numbits))); + /* + if ((numbits + 7) > bitcount) + { + return -1; + } + else + { + //Worst case scenario is that we will need to traverse 3 bytes + inf >>= (sizeof(byte)*8)*3 - numbits; + } + + return inf; //Will be a small unsigned integer so will not need any conversion when returning as int + */ +} + + +/*! +************************************************************************ +* \brief +* code from bitstream (2d tables) +************************************************************************ +*/ + +static int code_from_bitstream_2d(SyntaxElement *sym, + Bitstream *currStream, + const uint8_t *lentab, + const uint8_t *codtab, + int tabwidth, + int tabheight, + int *code) +{ + int i, j; + const uint8_t *len = &lentab[0], *cod = &codtab[0]; + + int *frame_bitoffset = &currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[*frame_bitoffset >> 3]; + + //Apply bitoffset to three bytes (maximum that may be traversed by ShowBitsThres) + unsigned int inf = ((*buf) << 16) + (*(buf + 1) << 8) + *(buf + 2); //Even at the end of a stream we will still be pulling out of allocated memory as alloc is done by MAX_CODED_FRAME_SIZE + inf <<= (*frame_bitoffset & 0x07); //Offset is constant so apply before extracting different numbers of bits + inf &= 0xFFFFFF; //Arithmetic shift so wipe any sign which may be extended inside ShowBitsThres + + // this VLC decoding method is not optimized for speed + for (j = 0; j < tabheight; j++) + { + for (i = 0; i < tabwidth; i++) + { + if ((*len == 0) || (ShowBitsThres(inf, *len) != *cod)) + { + len++; + cod++; + } + else + { + sym->len = *len; + *frame_bitoffset += *len; // move bitstream pointer + *code = *cod; + sym->value1 = i; + sym->value2 = j; + return 0; // found code and return + } + } + } + return -1; // failed to find code +} + +static int code_from_bitstream_2d_16_1(Bitstream *currStream, + const uint8_t *lentab, + const uint8_t *codtab) +{ + int i; + const uint8_t *len = &lentab[0], *cod = &codtab[0]; + + int *frame_bitoffset = &currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[*frame_bitoffset >> 3]; + + //Apply bitoffset to three bytes (maximum that may be traversed by ShowBitsThres) + unsigned int inf = ((*buf) << 16) + (*(buf + 1) << 8) + *(buf + 2); //Even at the end of a stream we will still be pulling out of allocated memory as alloc is done by MAX_CODED_FRAME_SIZE + inf <<= (*frame_bitoffset & 0x07); //Offset is constant so apply before extracting different numbers of bits + inf &= 0xFFFFFF; //Arithmetic shift so wipe any sign which may be extended inside ShowBitsThres + + // this VLC decoding method is not optimized for speed + for (i = 0; i < 16 && len[i]; i++) + { + if (ShowBitsThres(inf, len[i]) == cod[i]) + { + *frame_bitoffset += len[i]; // move bitstream pointer + return i; // found code and return + } + } + + return -1; // failed to find code +} + +int code_from_bitstream_2d_16_1_sse2(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab) +{ + unsigned long result; + + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + uint16_t inf; + + __m128i xmm_inf, xmm_mask, xmm_cod; + int match; + unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf); + _inf >>= 16-(frame_bitoffset & 0x07); + _inf &= 0xFFFF; + inf = (uint16_t)_inf; + + xmm_inf = _mm_set1_epi16(inf); + + xmm_cod = _mm_load_si128((__m128i *)codtab); + xmm_mask = _mm_load_si128((__m128i *)masktab); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += lentab[result]; // move bitstream pointer + return result; // found code and return + } + + xmm_cod = _mm_load_si128((__m128i *)(codtab+8)); + xmm_mask = _mm_load_si128((__m128i *)(masktab+8)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += lentab[result+8]; // move bitstream pointer + return result+8; + } + + + return -1; +} + +int code_from_bitstream_2d_16_1_c(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab) +{ + int i; + + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + uint16_t inf; + + unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf); + _inf >>= 16-(frame_bitoffset & 0x07); + _inf &= 0xFFFF; + inf = (uint16_t)_inf; + + // this VLC decoding method is not optimized for speed + for (i=0; i < 16; i++) + { + if ((inf & masktab[i]) == codtab[i])//ShowBitsThres(inf, len[i]) == cod[i]) + { + currStream->frame_bitoffset += lentab[i]; // move bitstream pointer + return i; // found code and return + } + } + + return -1; // failed to find code +} + +int code_from_bitstream_2d_17_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab) +{ + unsigned long result; + const uint16_t *len = lentab, *cod = codtab, *mask = masktab; + + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + uint16_t inf; + + __m128i xmm_inf, xmm_mask, xmm_cod; + int match; + unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf); + _inf >>= 16-(frame_bitoffset & 0x07); + _inf &= 0xFFFF; + inf = (uint16_t)_inf; + + xmm_inf = _mm_set1_epi16(inf); + + xmm_cod = _mm_loadu_si128((__m128i *)cod); + xmm_mask = _mm_loadu_si128((__m128i *)mask); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result]; // move bitstream pointer + sym->value1 = result; + sym->value2 = 0; + return 0; // found code and return + } + + /* second table - rows 1-8 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+17)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+17)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+17]; // move bitstream pointer + sym->value1 = 1+result; + sym->value2 = 1; + return 0; // found code and return + } + + /* first table, rows 9-16 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+8)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+8)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+8]; // move bitstream pointer + sym->value1 = 8+result; + sym->value2 = 0; + return 0; // found code and return + } + + /* extra one just for first table */ + if ((inf & mask[16]) == cod[16])//ShowBitsThres(inf, len[i]) == cod[i]) + { + currStream->frame_bitoffset += len[16]; // move bitstream pointer + sym->value1 = 16; + sym->value2 = 0; + return 0; // found code and return + } + + + + /* second table - rows 9-16 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+25)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+25)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+25]; // move bitstream pointer + sym->value1 = 9+result; + sym->value2 = 1; + return 0; // found code and return + } + + + /* third table - rows 1-8 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+34)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+34)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+34]; // move bitstream pointer + sym->value1 = 2+result; + sym->value2 = 2; + return 0; // found code and return + } + + + /* third table - rows 9-16 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+42)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+42)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+42]; // move bitstream pointer + sym->value1 = 10+result; + sym->value2 = 2; + return 0; // found code and return + } + + /* fourth table - rows 1-8 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+51)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+51)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+51]; // move bitstream pointer + sym->value1 = 3+result; + sym->value2 = 3; + return 0; // found code and return + } + + /* fourth table - rows 9-16 */ + xmm_cod = _mm_loadu_si128((__m128i *)(cod+59)); + xmm_mask = _mm_loadu_si128((__m128i *)(mask+59)); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + _BitScanForward(&result, match); + result >>= 1; + + currStream->frame_bitoffset += len[result+59]; // move bitstream pointer + sym->value1 = 11+result; + sym->value2 = 3; + return 0; // found code and return + } + + return -1; // failed to find code +} + + +int code_from_bitstream_2d_17_4_c(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab) +{ + int i, j; + const uint16_t *len, *cod, *mask; + + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + uint16_t inf; + + unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf); + _inf >>= 16-(frame_bitoffset & 0x07); + _inf &= 0xFFFF; + inf = (uint16_t)_inf; + + for (j=0;j<4;j++) + { + len = &lentab[j*17]; + cod = &codtab[j*17]; + mask = &masktab[j*17]; + // this VLC decoding method is not optimized for speed + for (i=0; i < 17; i++) + { + if ((inf & mask[i]) == cod[i])//ShowBitsThres(inf, len[i]) == cod[i]) + { + currStream->frame_bitoffset += len[i]; // move bitstream pointer + sym->value1 = j+i; + sym->value2 = j; + return 0; // found code and return + } + } + } + + return -1; // failed to find code +} + +static int code_from_bitstream_2d_9_4(SyntaxElement *sym, + Bitstream *currStream, + const uint16_t *lentab, + const uint16_t *codtab, + const uint16_t *masktab) +{ + int i, j; + const uint16_t *len, *cod, *mask; + + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + + uint16_t inf; + unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf); + _inf >>= 16-(frame_bitoffset & 0x07); + _inf &= 0xFFFF; + inf = (uint16_t)_inf; + + // this VLC decoding method is not optimized for speed + for (j = 0; j < 4; j++) + { + len = &lentab[j*9]; + cod = &codtab[j*9]; + mask = &masktab[j*9]; + + for (i=0; i < 9; i++) + { + if ((inf & mask[i]) == cod[i]) + { + sym->len = len[i]; + currStream->frame_bitoffset += len[i]; // move bitstream pointer + sym->value1 = j+i; + sym->value2 = j; + return 0; // found code and return + } + } + } + return -1; // failed to find code +} + +int code_from_bitstream_2d_5_4_c(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab) +{ + + int i; + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + uint8_t inf; + + unsigned int _inf = _byteswap_ushort(*(unsigned short *)buf); + _inf >>= 8-(frame_bitoffset & 0x07); + _inf &= 0xFF; + inf = (uint8_t)_inf; + + for (i = 0; i<16;i++) + { + if ((inf & masktab[i]) == codtab[i]) + { + currStream->frame_bitoffset += lentab[i]; // move bitstream pointer + sym->value2 = (i<<1)/9; + sym->value1 = sym->value2 + (((i<<1)%9)>>1); + + return 0; // found code and return + } + } + + return -1; // failed to find code + +} + + +int code_from_bitstream_2d_5_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab) +{ + int frame_bitoffset = currStream->frame_bitoffset; + const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3]; + uint8_t inf; + __m128i xmm_inf, xmm_mask, xmm_cod; + int match; + unsigned int _inf = _byteswap_ushort(*(unsigned short *)buf); + _inf >>= 8-(frame_bitoffset & 0x07); + _inf &= 0xFF; + inf = (uint8_t)_inf; + + xmm_inf = _mm_set1_epi8(_inf); + + xmm_cod = _mm_load_si128((__m128i *)codtab); + xmm_mask = _mm_load_si128((__m128i *)masktab); + xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf + xmm_mask = _mm_cmpeq_epi8(xmm_mask, xmm_cod); // mask == cod + match = _mm_movemask_epi8(xmm_mask); + if (match) + { + unsigned long result; + _BitScanForward(&result, match); + + currStream->frame_bitoffset += lentab[result]; // move bitstream pointer + sym->value2 = (result<<1)/9; + sym->value1 = sym->value2 + (((result<<1)%9)>>1); + return 0; // found code and return + } + return -1; +} + +/*! +************************************************************************ +* \brief +* read FLC codeword from UVLC-partition +************************************************************************ +*/ +int readSyntaxElement_FLC(Bitstream *currStream, int numbits) +{ + int totbitoffset = currStream->frame_bitoffset; + int bitoffset = /*7 - */(totbitoffset & 0x07); // bit from start of byte + int byteoffset = (totbitoffset >> 3); // byte from start of buffer + const uint8_t *ptr = &(currStream->streamBuffer[byteoffset]); + + uint32_t tmp = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8);// | (ptr[3]); + tmp <<= bitoffset; + tmp >>= 32 - numbits; + currStream->frame_bitoffset += numbits; + return tmp; +} + + + +/*! +************************************************************************ +* \brief +* read NumCoeff/TrailingOnes codeword from UVLC-partition +************************************************************************ +*/ + +int readSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *sym, + Bitstream *currStream, + int vlcnum) +{ + int frame_bitoffset = currStream->frame_bitoffset; + int BitstreamLengthInBytes = currStream->bitstream_length; + int BitstreamLengthInBits = (BitstreamLengthInBytes << 3) + 7; + const uint8_t *buf = currStream->streamBuffer; + + static const uint16_t lentab[3][4][17] = + { + { // 0702 + { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16}, + { 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16, 0}, + { 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16, 0, 0}, + { 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16, 0, 0, 0}, + }, + { + { 2, 6, 6, 7, 8, 8, 9,11,11,12,12,12,13,13,13,14,14}, + { 2, 5, 6, 6, 7, 8, 9,11,11,12,12,13,13,14,14,14, 0}, + { 3, 6, 6, 7, 8, 9,11,11,12,12,13,13,13,14,14, 0, 0}, + { 4, 4, 5, 6, 6, 7, 9,11,11,12,13,13,13,14, 0, 0, 0}, + }, + { + { 4, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,10}, + { 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9,10,10,10, 0}, + { 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,10, 0, 0}, + { 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9,10,10,10, 0, 0, 0}, + }, + }; +#if 0 // save for reference + static const uint32_t codtab[3][4][17] = + { + { + { 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7,4}, + { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10,6}, + { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9,5}, + { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12,8}, + }, + { + { 3,11, 7, 7, 7, 4, 7,15,11,15,11, 8,15,11, 7, 9,7}, + { 0, 2, 7,10, 6, 6, 6, 6,14,10,14,10,14,10,11, 8,6}, + { 0, 0, 3, 9, 5, 5, 5, 5,13, 9,13, 9,13, 9, 6,10,5}, + { 0, 0, 0, 5, 4, 6, 8, 4, 4, 4,12, 8,12,12, 8, 1,4}, + }, + { + {15,15,11, 8,15,11, 9, 8,15,11,15,11, 8,13, 9, 5,1}, + { 0,14,15,12,10, 8,14,10,14,14,10,14,10, 7,12, 8,4}, + { 0, 0,13,14,11, 9,13, 9,13,10,13, 9,13, 9,11, 7,3}, + { 0, 0, 0,12,11,10, 9, 8,13,12,12,12, 8,12,10, 6,2}, + }, + }; +#endif + static const uint16_t codtab[3][4][17] = + { + { + { 0x8000, 0x1400, 0x0700, 0x0380, 0x01C0, 0x00E0, 0x0078, 0x0058, 0x0040, 0x003C, 0x002C, 0x001E, 0x0016, 0x000F, 0x000B, 0x0007, 0x0004 }, + { 0x4000, 0x1000, 0x0600, 0x0300, 0x0180, 0x00C0, 0x0070, 0x0050, 0x0038, 0x0028, 0x001C, 0x0014, 0x0002, 0x000E, 0x000A, 0x0006, 0xFFFF }, + { 0x2000, 0x0A00, 0x0500, 0x0280, 0x0140, 0x00A0, 0x0068, 0x0048, 0x0034, 0x0024, 0x001A, 0x0012, 0x000D, 0x0009, 0x0005, 0xFFFF, 0xFFFF }, + { 0x1800, 0x0C00, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0060, 0x0030, 0x0020, 0x0018, 0x0010, 0x000C, 0x0008, 0xFFFF, 0xFFFF, 0xFFFF } + }, + { + { 0xC000, 0x2C00, 0x1C00, 0x0E00, 0x0700, 0x0400, 0x0380, 0x01E0, 0x0160, 0x00F0, 0x00B0, 0x0080, 0x0078, 0x0058, 0x0038, 0x0024, 0x001C }, + { 0x8000, 0x3800, 0x2800, 0x1800, 0x0C00, 0x0600, 0x0300, 0x01C0, 0x0140, 0x00E0, 0x00A0, 0x0070, 0x0050, 0x002C, 0x0020, 0x0018, 0xFFFF }, + { 0x6000, 0x2400, 0x1400, 0x0A00, 0x0500, 0x0280, 0x01A0, 0x0120, 0x00D0, 0x0090, 0x0068, 0x0048, 0x0030, 0x0028, 0x0014, 0xFFFF, 0xFFFF }, + { 0x5000, 0x4000, 0x3000, 0x2000, 0x1000, 0x0800, 0x0200, 0x0180, 0x0100, 0x00C0, 0x0060, 0x0040, 0x0008, 0x0010, 0xFFFF, 0xFFFF, 0xFFFF } + }, + { + { 0xF000, 0x3C00, 0x2C00, 0x2000, 0x1E00, 0x1600, 0x1200, 0x1000, 0x0F00, 0x0B00, 0x0780, 0x0580, 0x0400, 0x0340, 0x0240, 0x0140, 0x0040 }, + { 0xE000, 0x7800, 0x6000, 0x5000, 0x4000, 0x3800, 0x2800, 0x1C00, 0x0E00, 0x0A00, 0x0700, 0x0500, 0x0380, 0x0300, 0x0200, 0x0100, 0xFFFF }, + { 0xD000, 0x7000, 0x5800, 0x4800, 0x3400, 0x2400, 0x1A00, 0x1400, 0x0D00, 0x0900, 0x0680, 0x0480, 0x02C0, 0x01C0, 0x00C0, 0xFFFF, 0xFFFF }, + { 0xC000, 0xB000, 0xA000, 0x9000, 0x8000, 0x6800, 0x3000, 0x1800, 0x0C00, 0x0800, 0x0600, 0x0280, 0x0180, 0x0080, 0xFFFF, 0xFFFF, 0xFFFF } + } + }; + + static const uint16_t masktab[3][4][17] = + { + { + { 0x8000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, + { 0xC000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000 }, + { 0xE000, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000 }, + { 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000 } + }, + { + { 0xC000, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC }, + { 0xC000, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFC, 0x0000 }, + { 0xE000, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0x0000, 0x0000 }, + { 0xF000, 0xF000, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0x0000, 0x0000, 0x0000 } + }, + { + { 0xF000, 0xFC00, 0xFC00, 0xFC00, 0xFE00, 0xFE00, 0xFE00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0xFFC0 }, + { 0xF000, 0xF800, 0xF800, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0x0000 }, + { 0xF000, 0xF800, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0x0000, 0x0000 }, + { 0xF000, 0xF000, 0xF000, 0xF000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0x0000, 0x0000, 0x0000 } + } + }; + + int code; + // vlcnum is the index of Table used to code coeff_token + // vlcnum==3 means (8<=nC) which uses 6bit FLC + + if (vlcnum == 3) + { + // read 6 bit FLC + //code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBytes, 6); + code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBits, 6); + currStream->frame_bitoffset += 6; + sym->value2 = (code & 3); + sym->value1 = (code >> 2); + + if (!sym->value1 && sym->value2 == 3) + { + // #c = 0, #t1 = 3 => #c = 0 + sym->value2 = 0; + } + else + sym->value1++; + } + else + { + //retval = code_from_bitstream_2d(sym, currStream, &lentab[vlcnum][0][0], &codtab[vlcnum][0][0], 17, 4, &code); + code = opt_code_from_bitstream_2d_17_4(sym, currStream, lentab[vlcnum][0], codtab[vlcnum][0], masktab[vlcnum][0]); + } + + return 0; +} + + +/*! +************************************************************************ +* \brief +* read NumCoeff/TrailingOnes codeword from UVLC-partition ChromaDC +************************************************************************ +*/ +int readSyntaxElement_NumCoeffTrailingOnesChromaDC(VideoParameters *p_Vid, SyntaxElement *sym, Bitstream *currStream) +{ +#if 0 + static const uint8_t lentab[3][4][17] = + { + //YUV420 + {{ 2, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 3, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV422 + {{ 1, 7, 7, 9, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 7, 9,10,11,12,12, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 3, 7, 7, 9,10,11,12, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 5, 6, 7, 7,10,11, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV444 + {{ 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16}, + { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16}, + { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16}, + { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16}} + }; +#endif + + //YUV420 + + __declspec(align(32)) static const uint8_t lentab420[16] = + { 2, 6, 6, 6, 6, 1, 6, 7, 8, 3, 7, 8, 0, 0, 6, 7 }; + __declspec(align(32)) static const uint8_t codtab420[16] = + { 0x40, 0x1C, 0x10, 0x0C, 0x08, 0x80, 0x18, 0x06, 0x03, 0x20, 0x04, 0x02, 0xFF, 0xFF, 0x14, 0x00 }; + __declspec(align(32)) static const uint8_t masktab420[16] = + { 0xC0, 0xFC, 0xFC, 0xFC, 0xFC, 0x80, 0xFC, 0xFE, 0xFF, 0xE0, 0xFE, 0xFF, 0x00, 0x00, 0xFC, 0xFE }; + + + // YUV422 + __declspec(align(32)) static const uint16_t lentab422[4][9] = + { + { 1, 7, 7, 9, 9,10,11,12,13 }, + { 2, 7, 7, 9,10,11,12,12, 0 }, + { 3, 7, 7, 9,10,11,12, 0, 0 }, + { 5, 6, 7, 7,10,11, 0, 0, 0 } + }; + __declspec(align(32)) static const uint16_t codtab422[4][9] = + { + { 0x8000, 0x1E00, 0x1C00, 0x0380, 0x0300, 0x01C0, 0x00E0, 0x0070, 0x0038 }, + { 0x4000, 0x1A00, 0x1800, 0x0280, 0x0180, 0x00C0, 0x0060, 0x0050, 0xFFFF }, + { 0x2000, 0x1600, 0x1400, 0x0200, 0x0140, 0x00A0, 0x0040, 0xFFFF, 0xFFFF }, + { 0x0800, 0x0400, 0x1200, 0x1000, 0x0100, 0x0080, 0xFFFF, 0xFFFF, 0xFFFF } + }; + __declspec(align(32)) static const uint16_t masktab422[4][9] = + { + { 0x8000, 0xFE00, 0xFE00, 0xFF80, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0xFFF8 }, + { 0xC000, 0xFE00, 0xFE00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0xFFF0, 0x0000 }, + { 0xE000, 0xFE00, 0xFE00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0x0000, 0x0000 }, + { 0xF800, 0xFC00, 0xFE00, 0xFE00, 0xFFC0, 0xFFE0, 0x0000, 0x0000, 0x0000 } + }; + + // YUV444 + __declspec(align(32)) static const uint16_t lentab444[4][17] = + { + { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16}, + { 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16, 0}, + { 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16, 0, 0}, + { 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16, 0, 0, 0} + }; + __declspec(align(32)) static const uint16_t codtab444[4][17] = + { + { 0x8000, 0x1400, 0x0700, 0x0380, 0x01C0, 0x00E0, 0x0078, 0x0058, 0x0040, 0x003C, 0x002C, 0x001E, 0x0016, 0x000F, 0x000B, 0x0007, 0x0004 }, + { 0x4000, 0x1000, 0x0600, 0x0300, 0x0180, 0x00C0, 0x0070, 0x0050, 0x0038, 0x0028, 0x001C, 0x0014, 0x0002, 0x000E, 0x000A, 0x0006, 0xFFFF }, + { 0x2000, 0x0A00, 0x0500, 0x0280, 0x0140, 0x00A0, 0x0068, 0x0048, 0x0034, 0x0024, 0x001A, 0x0012, 0x000D, 0x0009, 0x0005, 0xFFFF, 0xFFFF }, + { 0x1800, 0x0C00, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0060, 0x0030, 0x0020, 0x0018, 0x0010, 0x000C, 0x0008, 0xFFFF, 0xFFFF, 0xFFFF } + }; + __declspec(align(32)) static const uint16_t masktab444[4][17] = + { + { 0x8000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, + { 0xC000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000 }, + { 0xE000, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000 }, + { 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000 } + }; + +#if 0 + static const uint8_t codtab[3][4][17] = + { + //YUV420 + {{ 1, 7, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV422 + {{ 1,15,14, 7, 6, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1,13,12, 5, 6, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 1,11,10, 4, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 1, 1, 9, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV444 + {{ 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7, 4}, + { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10, 6}, + { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9, 5}, + { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12, 8}} + }; +#endif + + int code; + int yuv = p_Vid->active_sps->chroma_format_idc - 1; + switch(yuv) + { + case 0: + code = opt_code_from_bitstream_2d_5_4(sym, currStream, lentab420, codtab420, masktab420); + break; + case 1: + code = code_from_bitstream_2d_9_4(sym, currStream, lentab422[0], codtab422[0], masktab422[0]); + break; + case 2: + code = opt_code_from_bitstream_2d_17_4(sym, currStream, lentab444[0], codtab444[0], masktab444[0]); + break; + default: + __assume(0); + return -1; + } + + return 0; +} + +/*! +************************************************************************ +* \brief +* read Level VLC0 codeword from UVLC-partition +************************************************************************ +*/ +int readSyntaxElement_Level_VLC0(Bitstream *currStream) +{ + int frame_bitoffset = currStream->frame_bitoffset; + int BitstreamLengthInBytes = currStream->bitstream_length; + int BitstreamLengthInBits = (BitstreamLengthInBytes << 3) + 7; + byte *buf = currStream->streamBuffer; + int len = 1, sign = 0, level = 0, code = 1; + + while (!ShowBits(buf, frame_bitoffset++, BitstreamLengthInBits, 1)) + len++; + + if (len < 15) + { + sign = (len - 1) & 1; + level = ((len - 1) >> 1) + 1; + } + else if (len == 15) + { + // escape code + code <<= 4; + code |= ShowBits(buf, frame_bitoffset, BitstreamLengthInBits, 4); + len += 4; + frame_bitoffset += 4; + sign = (code & 0x01); + level = ((code >> 1) & 0x07) + 8; + } + else if (len >= 16) + { + // escape code + int addbit = (len - 16); + int offset = (2048 << addbit) - 2032; + len -= 4; + code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBits, len); + sign = (code & 0x01); + frame_bitoffset += len; + level = (code >> 1) + offset; + + code |= (1 << (len)); // for display purpose only + len += addbit + 16; + } + currStream->frame_bitoffset = frame_bitoffset; + return (sign) ? -level : level ; + //sym->len = len; + +#if TRACE + tracebits2(sym->tracestring, sym->len, code); +#endif + + + return 0; +} + +/*! +************************************************************************ +* \brief +* read Level VLC codeword from UVLC-partition +************************************************************************ +*/ +int readSyntaxElement_Level_VLCN(int vlc, Bitstream *currStream) +{ + int frame_bitoffset = currStream->frame_bitoffset; + int BitstreamLengthInBytes = currStream->bitstream_length; + int BitstreamLengthInBits = (BitstreamLengthInBytes << 3) + 7; + byte *buf = currStream->streamBuffer; + + int levabs, sign; + int len = 1; + int code = 1, sb; + + int shift = vlc - 1; + + // read pre zeros + while (!ShowBits(buf, frame_bitoffset ++, BitstreamLengthInBits, 1)) + len++; + + frame_bitoffset -= len; + + if (len < 16) + { + levabs = ((len - 1) << shift) + 1; + + // read (vlc-1) bits -> suffix + if (shift) + { + sb = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, shift); + code = (code << (shift) )| sb; + levabs += sb; + len += (shift); + } + + // read 1 bit -> sign + sign = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, 1); + code = (code << 1)| sign; + len ++; + } + else // escape + { + int addbit = len - 5; + int offset = (1 << addbit) + (15 << shift) - 2047; + + sb = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, addbit); + code = (code << addbit ) | sb; + len += addbit; + + levabs = sb + offset; + + // read 1 bit -> sign + sign = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, 1); + + code = (code << 1)| sign; + + len++; + } + + currStream->frame_bitoffset = frame_bitoffset + len; + return (sign)? -levabs : levabs; +} + +/*! +************************************************************************ +* \brief +* read Total Zeros codeword from UVLC-partition +************************************************************************ +*/ +int readSyntaxElement_TotalZeros(Bitstream *currStream, int vlcnum) +{ + __declspec(align(32)) static const uint16_t lentab[TOTRUN_NUM][16] = + { + + { 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, + { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, + { 4,3,3,3,4,4,3,3,4,5,5,6,5,6}, + { 5,3,4,4,3,3,3,4,3,4,5,5,5}, + { 4,4,4,3,3,3,3,3,4,5,4,5}, + { 6,5,3,3,3,3,3,3,4,3,6}, + { 6,5,3,3,3,2,3,4,3,6}, + { 6,4,5,3,2,2,3,3,6}, + { 6,6,4,2,2,3,2,5}, + { 5,5,3,2,2,2,4}, + { 4,4,3,3,1,3}, + { 4,4,2,1,3}, + { 3,3,1,2}, + { 2,2,1}, + { 1,1}, + }; +/* + static const byte codtab[TOTRUN_NUM][16] = + { + {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, + {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, + {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, + {3,7,5,4,6,5,4,3,3,2,2,1,0}, + {5,4,3,7,6,5,4,3,2,1,1,0}, + {1,1,7,6,5,4,3,2,1,1,0}, + {1,1,5,4,3,3,2,1,1,0}, + {1,1,1,3,3,2,2,1,0}, + {1,0,1,3,2,1,1,1,}, + {1,0,1,3,2,1,1,}, + {0,1,1,2,1,3}, + {0,1,1,1,1}, + {0,1,1,1}, + {0,1,1}, + {0,1}, + };*/ + + __declspec(align(32)) static const uint16_t codtab[TOTRUN_NUM][16] = + { +{ 0x8000, 0x6000, 0x4000, 0x3000, 0x2000, 0x1800, 0x1000, 0x0C00, 0x0800, 0x0600, 0x0400, 0x0300, 0x0200, 0x0180, 0x0100, 0x0080, }, +{ 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x5000, 0x4000, 0x3000, 0x2000, 0x1800, 0x1000, 0x0C00, 0x0800, 0x0400, 0x0000, 0xFFFF, }, +{ 0x5000, 0xE000, 0xC000, 0xA000, 0x4000, 0x3000, 0x8000, 0x6000, 0x2000, 0x1800, 0x1000, 0x0400, 0x0800, 0x0000, 0xFFFF, 0xFFFF, }, +{ 0x1800, 0xE000, 0x5000, 0x4000, 0xC000, 0xA000, 0x8000, 0x3000, 0x6000, 0x2000, 0x1000, 0x0800, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x5000, 0x4000, 0x3000, 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x2000, 0x0800, 0x1000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0400, 0x0800, 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x4000, 0x1000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0400, 0x0800, 0xA000, 0x8000, 0x6000, 0xC000, 0x4000, 0x1000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0400, 0x1000, 0x0800, 0x6000, 0xC000, 0x8000, 0x4000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0400, 0x0000, 0x1000, 0xC000, 0x8000, 0x2000, 0x4000, 0x0800, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0800, 0x0000, 0x2000, 0xC000, 0x8000, 0x4000, 0x1000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0000, 0x1000, 0x2000, 0x4000, 0x8000, 0x6000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0000, 0x1000, 0x4000, 0x8000, 0x2000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0000, 0x2000, 0x8000, 0x4000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0000, 0x4000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x0000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, } + }; + __declspec(align(32)) static const uint16_t masktab[TOTRUN_NUM][16] = + { +{ 0x8000, 0xE000, 0xE000, 0xF000, 0xF000, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFF80, }, +{ 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF000, 0xF000, 0xF000, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0x0000, }, +{ 0xF000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF000, 0xE000, 0xE000, 0xF000, 0xF800, 0xF800, 0xFC00, 0xF800, 0xFC00, 0x0000, 0x0000, }, +{ 0xF800, 0xE000, 0xF000, 0xF000, 0xE000, 0xE000, 0xE000, 0xF000, 0xE000, 0xF000, 0xF800, 0xF800, 0xF800, 0x0000, 0x0000, 0x0000, }, +{ 0xF000, 0xF000, 0xF000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF800, 0xF000, 0xF800, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xFC00, 0xF800, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xE000, 0xFC00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xFC00, 0xF800, 0xE000, 0xE000, 0xE000, 0xC000, 0xE000, 0xF000, 0xE000, 0xFC00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xFC00, 0xF000, 0xF800, 0xE000, 0xC000, 0xC000, 0xE000, 0xE000, 0xFC00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xFC00, 0xFC00, 0xF000, 0xC000, 0xC000, 0xE000, 0xC000, 0xF800, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xF800, 0xF800, 0xE000, 0xC000, 0xC000, 0xC000, 0xF000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xF000, 0xF000, 0xE000, 0xE000, 0x8000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xF000, 0xF000, 0xC000, 0x8000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xE000, 0xE000, 0x8000, 0xC000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xC000, 0xC000, 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0x8000, 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +}; + + + return opt_code_from_bitstream_2d_16_1(currStream, lentab[vlcnum], codtab[vlcnum], masktab[vlcnum]); +} + +/*! +************************************************************************ +* \brief +* read Total Zeros Chroma DC codeword from UVLC-partition +************************************************************************ +*/ +int readSyntaxElement_TotalZerosChromaDC(VideoParameters *p_Vid, Bitstream *currStream, int vlcnum) +{ + static const byte lentab[3][TOTRUN_NUM][16] = + { + //YUV420 + {{ 1,2,3,3}, + { 1,2,2}, + { 1,1}}, + //YUV422 + {{ 1,3,3,4,4,4,5,5}, + { 3,2,3,3,3,3,3}, + { 3,3,2,2,3,3}, + { 3,2,2,2,3}, + { 2,2,2,2}, + { 2,2,1}, + { 1,1}}, + //YUV444 + {{ 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, + { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, + { 4,3,3,3,4,4,3,3,4,5,5,6,5,6}, + { 5,3,4,4,3,3,3,4,3,4,5,5,5}, + { 4,4,4,3,3,3,3,3,4,5,4,5}, + { 6,5,3,3,3,3,3,3,4,3,6}, + { 6,5,3,3,3,2,3,4,3,6}, + { 6,4,5,3,2,2,3,3,6}, + { 6,6,4,2,2,3,2,5}, + { 5,5,3,2,2,2,4}, + { 4,4,3,3,1,3}, + { 4,4,2,1,3}, + { 3,3,1,2}, + { 2,2,1}, + { 1,1}} + }; + + static const byte codtab[3][TOTRUN_NUM][16] = + { + //YUV420 + {{ 1,1,1,0}, + { 1,1,0}, + { 1,0}}, + //YUV422 + {{ 1,2,3,2,3,1,1,0}, + { 0,1,1,4,5,6,7}, + { 0,1,1,2,6,7}, + { 6,0,1,2,7}, + { 0,1,2,3}, + { 0,1,1}, + { 0,1}}, + //YUV444 + {{1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, + {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, + {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, + {3,7,5,4,6,5,4,3,3,2,2,1,0}, + {5,4,3,7,6,5,4,3,2,1,1,0}, + {1,1,7,6,5,4,3,2,1,1,0}, + {1,1,5,4,3,3,2,1,1,0}, + {1,1,1,3,3,2,2,1,0}, + {1,0,1,3,2,1,1,1,}, + {1,0,1,3,2,1,1,}, + {0,1,1,2,1,3}, + {0,1,1,1,1}, + {0,1,1,1}, + {0,1,1}, + {0,1}} + }; + + int yuv = p_Vid->active_sps->chroma_format_idc - 1; + return code_from_bitstream_2d_16_1(currStream, &lentab[yuv][vlcnum][0], &codtab[yuv][vlcnum][0]); +} + + +/*! +************************************************************************ +* \brief +* read Run codeword from UVLC-partition +************************************************************************ +*/ +int readSyntaxElement_Run(Bitstream *currStream, int vlcnum) +{ + __declspec(align(32)) static const uint16_t lentab[TOTRUN_NUM][16] = + { + {1,1}, + {1,2,2}, + {2,2,2,2}, + {2,2,2,3,3}, + {2,2,3,3,3,3}, + {2,3,3,3,3,3,3}, + {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11}, + }; +/* + static const byte codtab[TOTRUN_NUM][16] = + { + {1,0}, + {1,1,0}, + {3,2,1,0}, + {3,2,1,1,0}, + {3,2,3,2,1,0}, + {3,0,1,3,2,5,4}, + {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1}, + };*/ + + __declspec(align(32)) static const uint16_t codtab[TOTRUN_NUM][16] = + { +{ 0x8000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0x8000, 0x4000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0xC000, 0x8000, 0x4000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0xC000, 0x8000, 0x4000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0xC000, 0x8000, 0x6000, 0x4000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0xC000, 0x0000, 0x2000, 0x6000, 0x4000, 0xA000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }, +{ 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0xFFFF, } + }; + __declspec(align(32)) static const uint16_t masktab[TOTRUN_NUM][16] = +{ +{ 0x8000, 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0x8000, 0xC000, 0xC000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xC000, 0xC000, 0xC000, 0xC000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xC000, 0xC000, 0xC000, 0xE000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xC000, 0xC000, 0xE000, 0xE000, 0xE000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xC000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }, +{ 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0x0000, } + }; + return opt_code_from_bitstream_2d_16_1(currStream, lentab[vlcnum], codtab[vlcnum], masktab[vlcnum]); +} + + +/*! +************************************************************************ +* \brief +* Reads bits from the bitstream buffer +* +* \param buffer +* containing VLC-coded data bits +* \param totbitoffset +* bit offset from start of partition +* \param info +* returns value of the read bits +* \param bitcount +* total bytes in bitstream +* \param numbits +* number of bits to read +* +************************************************************************ +*/ + +int GetBits (const uint8_t buffer[],int totbitoffset,int *info, int bitcount, + int numbits) +{ + int bitoffset = /*7 - */(totbitoffset & 0x07); // bit from start of byte + int byteoffset = (totbitoffset >> 3); // byte from start of buffer + const uint8_t *ptr = &(buffer[byteoffset]); + + uint32_t tmp = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | (ptr[3]); + tmp <<= bitoffset; + tmp >>= 32 - numbits; + *info = tmp; + return numbits; +} + +/*! +************************************************************************ +* \brief +* Reads bits from the bitstream buffer +* +* \param buffer +* buffer containing VLC-coded data bits +* \param totbitoffset +* bit offset from start of partition +* \param bitcount +* total bytes in bitstream +* \param numbits +* number of bits to read +* +************************************************************************ +*/ + +static int ShowBits (const uint8_t buffer[],int totbitoffset,int bitcount, int numbits) +{ + int bitoffset = /*7 - */(totbitoffset & 0x07); // bit from start of byte + int byteoffset = (totbitoffset >> 3); // byte from start of buffer + const uint8_t *ptr = &(buffer[byteoffset]); + + uint32_t tmp = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | (ptr[3]); + tmp <<= bitoffset; + tmp >>= 32 - numbits; + return tmp; + +#if 0 + if ((totbitoffset + numbits ) > bitcount) + { + return -1; + } + else + { + int bitoffset = 7 - (totbitoffset & 0x07); // bit from start of byte + int byteoffset = (totbitoffset >> 3); // byte from start of buffer + const uint8_t *curbyte = &(buffer[byteoffset]); + int inf = 0; + + while (numbits--) + { + inf <<=1; + inf |= ((*curbyte)>> (bitoffset--)) & 0x01; + + if (bitoffset == -1 ) + { //Move onto next byte to get all of numbits + curbyte++; + bitoffset = 7; + } + } + return inf; // return absolute offset in bit from start of frame + } +#endif +} + diff --git a/Src/h264dec/ldecod_vc9.vcxproj b/Src/h264dec/ldecod_vc9.vcxproj new file mode 100644 index 00000000..ad81b0c3 --- /dev/null +++ b/Src/h264dec/ldecod_vc9.vcxproj @@ -0,0 +1,487 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release IPP|Win32"> + <Configuration>Release IPP</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release IPP|x64"> + <Configuration>Release IPP</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <VCProjectVersion>17.0</VCProjectVersion> + <ProjectName>h264dec</ProjectName> + <ProjectGuid>{5499B067-CF32-4141-A757-E0A29866994A}</ProjectGuid> + <RootNamespace>ldecod</RootNamespace> + <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <PlatformToolset>v142</PlatformToolset> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <PlatformToolset>v142</PlatformToolset> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <PlatformToolset>v142</PlatformToolset> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <PlatformToolset>v142</PlatformToolset> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <PlatformToolset>v142</PlatformToolset> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <PlatformToolset>v142</PlatformToolset> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <OutDir>$(PlatformShortName)_$(Configuration)\</OutDir> + <IntDir>$(PlatformShortName)_$(Configuration)\</IntDir> + <IncludePath>$(IncludePath)</IncludePath> + <LibraryPath>$(LibraryPath)</LibraryPath> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <IncludePath>$(IncludePath)</IncludePath> + <LibraryPath>$(LibraryPath)</LibraryPath> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <OutDir>$(PlatformShortName)_$(Configuration)\</OutDir> + <IntDir>$(PlatformShortName)_$(Configuration)\</IntDir> + <IncludePath>$(IncludePath)</IncludePath> + <LibraryPath>$(LibraryPath)</LibraryPath> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <IncludePath>$(IncludePath)</IncludePath> + <LibraryPath>$(LibraryPath)</LibraryPath> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'"> + <OutDir>$(PlatformShortName)_$(Configuration)\</OutDir> + <IntDir>$(PlatformShortName)_$(Configuration)\</IntDir> + <IncludePath>$(IncludePath)</IncludePath> + <LibraryPath>$(LibraryPath)</LibraryPath> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'"> + <IncludePath>$(IncludePath)</IncludePath> + <LibraryPath>$(LibraryPath)</LibraryPath> + </PropertyGroup> + <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <VcpkgConfiguration>Debug</VcpkgConfiguration> + <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet> + </PropertyGroup> + <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet> + <VcpkgConfiguration>Debug</VcpkgConfiguration> + </PropertyGroup> + <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet> + </PropertyGroup> + <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <TypeLibraryName>.\ldecod/Debug_vc8/ldecod.tlb</TypeLibraryName> + <HeaderFileName /> + </Midl> + <ClCompile> + <Optimization>Disabled</Optimization> + <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> + <PrecompiledHeader>NotUsing</PrecompiledHeader> + <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile> + <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName> + <BrowseInformation>true</BrowseInformation> + <WarningLevel>Level3</WarningLevel> + <SuppressStartupBanner>true</SuppressStartupBanner> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <CompileAs>Default</CompileAs> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <Culture>0x0409</Culture> + </ResourceCompile> + <Lib> + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies> + </Lib> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <Midl> + <TypeLibraryName>.\ldecod/Debug_vc8/ldecod.tlb</TypeLibraryName> + <HeaderFileName> + </HeaderFileName> + </Midl> + <ClCompile> + <Optimization>Disabled</Optimization> + <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> + <PrecompiledHeader>NotUsing</PrecompiledHeader> + <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile> + <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName> + <BrowseInformation>true</BrowseInformation> + <WarningLevel>Level3</WarningLevel> + <SuppressStartupBanner>true</SuppressStartupBanner> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <CompileAs>Default</CompileAs> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <Culture>0x0409</Culture> + </ResourceCompile> + <Lib> + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies> + </Lib> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName> + <HeaderFileName /> + </Midl> + <ClCompile> + <Optimization>Full</Optimization> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <IntrinsicFunctions>true</IntrinsicFunctions> + <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <StringPooling>true</StringPooling> + <ExceptionHandling /> + <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>true</FunctionLevelLinking> + <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet> + <WarningLevel>Level3</WarningLevel> + <SuppressStartupBanner>true</SuppressStartupBanner> + <DebugInformationFormat>None</DebugInformationFormat> + <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <Culture>0x0409</Culture> + </ResourceCompile> + <Lib> + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies> + </Lib> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <Midl> + <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName> + <HeaderFileName> + </HeaderFileName> + </Midl> + <ClCompile> + <Optimization>Full</Optimization> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <IntrinsicFunctions>true</IntrinsicFunctions> + <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <StringPooling>true</StringPooling> + <ExceptionHandling> + </ExceptionHandling> + <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>true</FunctionLevelLinking> + <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet> + <WarningLevel>Level3</WarningLevel> + <SuppressStartupBanner>true</SuppressStartupBanner> + <DebugInformationFormat>None</DebugInformationFormat> + <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <Culture>0x0409</Culture> + </ResourceCompile> + <Lib> + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies> + </Lib> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'"> + <Midl> + <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName> + <HeaderFileName /> + </Midl> + <ClCompile> + <Optimization>Full</Optimization> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <IntrinsicFunctions>true</IntrinsicFunctions> + <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> + <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <StringPooling>true</StringPooling> + <ExceptionHandling /> + <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> + <BufferSecurityCheck>false</BufferSecurityCheck> + <WarningLevel>Level3</WarningLevel> + <SuppressStartupBanner>true</SuppressStartupBanner> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4101;%(DisableSpecificWarnings)</DisableSpecificWarnings> + <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <Culture>0x0409</Culture> + </ResourceCompile> + <Lib> + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies> + </Lib> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'"> + <Midl> + <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName> + <HeaderFileName> + </HeaderFileName> + </Midl> + <ClCompile> + <Optimization>Full</Optimization> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <IntrinsicFunctions>true</IntrinsicFunctions> + <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> + <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <StringPooling>true</StringPooling> + <ExceptionHandling> + </ExceptionHandling> + <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> + <BufferSecurityCheck>false</BufferSecurityCheck> + <WarningLevel>Level3</WarningLevel> + <SuppressStartupBanner>true</SuppressStartupBanner> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4101;%(DisableSpecificWarnings)</DisableSpecificWarnings> + <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <Culture>0x0409</Culture> + </ResourceCompile> + <Lib> + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies> + </Lib> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="ldecod\src\biari.asm"> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath) +</Command> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath) +</Command> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)biarix86.obj;%(Outputs)</Outputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)biarix86.obj;%(Outputs)</Outputs> + <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi "%(FullPath)" +</Command> + <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi "%(FullPath)" +</Command> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">$(IntDir)biarix86.obj;%(Outputs)</Outputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">$(IntDir)biarix86.obj;%(Outputs)</Outputs> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath) +</Command> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath) +</Command> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)biarix86.obj;%(Outputs)</Outputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)biarix86.obj;%(Outputs)</Outputs> + </CustomBuild> + <CustomBuild Include="ldecod\src\macroblock.asm"> + <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">ml /c /nologo /Fo"$(IntDir)macroblockx86.obj" /Zi "%(FullPath)" +</Command> + <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">ml /c /nologo /Fo"$(IntDir)macroblockx86.obj" /Zi "%(FullPath)" +</Command> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">$(IntDir)macroblockx86.obj;%(Outputs)</Outputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">$(IntDir)macroblockx86.obj;%(Outputs)</Outputs> + </CustomBuild> + <CustomBuild Include="ldecod\src\prediction.asm"> + <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">ml /c /nologo /Fo"$(IntDir)predictionx86.obj" /Zi "%(FullPath)" +</Command> + <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">ml /c /nologo /Fo"$(IntDir)predictionx86.obj" /Zi "%(FullPath)" +</Command> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">$(IntDir)predictionx86.obj;%(Outputs)</Outputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">$(IntDir)predictionx86.obj;%(Outputs)</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ClCompile Include="dec_api.c" /> + <ClCompile Include="lcommon\src\memalloc.c" /> + <ClCompile Include="lcommon\src\memcache.c" /> + <ClCompile Include="lcommon\src\mv_prediction.c" /> + <ClCompile Include="lcommon\src\parsetcommon.c" /> + <ClCompile Include="lcommon\src\transform.c" /> + <ClCompile Include="lcommon\src\win32.c" /> + <ClCompile Include="ldecod\src\biaridecod.c" /> + <ClCompile Include="ldecod\src\block.c" /> + <ClCompile Include="ldecod\src\cabac.c" /> + <ClCompile Include="ldecod\src\context_ini.c" /> + <ClCompile Include="ldecod\src\erc_api.c" /> + <ClCompile Include="ldecod\src\erc_do_i.c" /> + <ClCompile Include="ldecod\src\erc_do_p.c" /> + <ClCompile Include="ldecod\src\errorconcealment.c" /> + <ClCompile Include="ldecod\src\filter_chroma_horiz.c" /> + <ClCompile Include="ldecod\src\filter_chroma_vert.c" /> + <ClCompile Include="ldecod\src\filter_luma_horiz.c" /> + <ClCompile Include="ldecod\src\filter_luma_vert.c" /> + <ClCompile Include="ldecod\src\fmo.c" /> + <ClCompile Include="ldecod\src\header.c" /> + <ClCompile Include="ldecod\src\image.c" /> + <ClCompile Include="ldecod\src\intra16x16_pred.c" /> + <ClCompile Include="ldecod\src\intra4x4_pred.c" /> + <ClCompile Include="ldecod\src\intra8x8_pred.c" /> + <ClCompile Include="ldecod\src\intra_chroma_pred.c" /> + <ClCompile Include="ldecod\src\ldecod.c" /> + <ClCompile Include="ldecod\src\loopFilter.c" /> + <ClCompile Include="ldecod\src\macroblock.c" /> + <ClCompile Include="ldecod\src\mbuffer.c" /> + <ClCompile Include="ldecod\src\mb_access.c" /> + <ClCompile Include="ldecod\src\mb_prediction.c" /> + <ClCompile Include="ldecod\src\mc_prediction.c" /> + <ClCompile Include="ldecod\src\meminput.c" /> + <ClCompile Include="ldecod\src\nal.c" /> + <ClCompile Include="ldecod\src\nalu.c" /> + <ClCompile Include="ldecod\src\nalucommon.c" /> + <ClCompile Include="ldecod\src\output.c" /> + <ClCompile Include="ldecod\src\parset.c" /> + <ClCompile Include="ldecod\src\quant.c" /> + <ClCompile Include="ldecod\src\sei.c" /> + <ClCompile Include="ldecod\src\storable_picture.c" /> + <ClCompile Include="ldecod\src\strength_horiz.c" /> + <ClCompile Include="ldecod\src\strength_vert.c" /> + <ClCompile Include="ldecod\src\transform8x8.c" /> + <ClCompile Include="ldecod\src\vlc.c" /> + </ItemGroup> + <ItemGroup> + <ClInclude Include="dec_api.h" /> + <ClInclude Include="lcommon\inc\ctx_tables.h" /> + <ClInclude Include="lcommon\inc\frame.h" /> + <ClInclude Include="lcommon\inc\ifunctions.h" /> + <ClInclude Include="lcommon\inc\mb_access.h" /> + <ClInclude Include="lcommon\inc\memalloc.h" /> + <ClInclude Include="lcommon\inc\memcache.h" /> + <ClInclude Include="lcommon\inc\mv_prediction.h" /> + <ClInclude Include="lcommon\inc\nalucommon.h" /> + <ClInclude Include="lcommon\inc\transform.h" /> + <ClInclude Include="lcommon\inc\types.h" /> + <ClInclude Include="lcommon\inc\win32.h" /> + <ClInclude Include="ldecod\inc\biaridecod.h" /> + <ClInclude Include="ldecod\inc\block.h" /> + <ClInclude Include="ldecod\inc\cabac.h" /> + <ClInclude Include="ldecod\inc\context_ini.h" /> + <ClInclude Include="ldecod\inc\contributors.h" /> + <ClInclude Include="ldecod\inc\defines.h" /> + <ClInclude Include="ldecod\inc\elements.h" /> + <ClInclude Include="ldecod\inc\erc_api.h" /> + <ClInclude Include="ldecod\inc\erc_do.h" /> + <ClInclude Include="ldecod\inc\erc_globals.h" /> + <ClInclude Include="ldecod\inc\errorconcealment.h" /> + <ClInclude Include="ldecod\inc\fmo.h" /> + <ClInclude Include="ldecod\inc\global.h" /> + <ClInclude Include="ldecod\inc\header.h" /> + <ClInclude Include="ldecod\inc\image.h" /> + <ClInclude Include="ldecod\inc\intra16x16_pred.h" /> + <ClInclude Include="ldecod\inc\intra4x4_pred.h" /> + <ClInclude Include="ldecod\inc\intra8x8_pred.h" /> + <ClInclude Include="ldecod\inc\leaky_bucket.h" /> + <ClInclude Include="ldecod\inc\loopfilter.h" /> + <ClInclude Include="ldecod\inc\macroblock.h" /> + <ClInclude Include="ldecod\inc\mbuffer.h" /> + <ClInclude Include="ldecod\inc\mc_prediction.h" /> + <ClInclude Include="ldecod\inc\meminput.h" /> + <ClInclude Include="ldecod\inc\nalu.h" /> + <ClInclude Include="ldecod\inc\optim.h" /> + <ClInclude Include="ldecod\inc\output.h" /> + <ClInclude Include="ldecod\inc\parset.h" /> + <ClInclude Include="ldecod\inc\parsetcommon.h" /> + <ClInclude Include="ldecod\inc\quant.h" /> + <ClInclude Include="ldecod\inc\sei.h" /> + <ClInclude Include="ldecod\inc\transform8x8.h" /> + <ClInclude Include="ldecod\inc\vlc.h" /> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project>
\ No newline at end of file diff --git a/Src/h264dec/ldecod_vc9.vcxproj.filters b/Src/h264dec/ldecod_vc9.vcxproj.filters new file mode 100644 index 00000000..00bcc7d3 --- /dev/null +++ b/Src/h264dec/ldecod_vc9.vcxproj.filters @@ -0,0 +1,304 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{303af6ae-839c-47f8-9a67-adb97270c1cc}</UniqueIdentifier> + <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions> + </Filter> + <Filter Include="Source Files\deblocking filter"> + <UniqueIdentifier>{6a99fb4f-1595-4387-a229-fb983e10ee1d}</UniqueIdentifier> + </Filter> + <Filter Include="Header Files"> + <UniqueIdentifier>{3b640415-33d1-4075-9c43-c885e4ab8760}</UniqueIdentifier> + <Extensions>h;hpp;hxx;hm;inl</Extensions> + </Filter> + </ItemGroup> + <ItemGroup> + <ClCompile Include="ldecod\src\biaridecod.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\block.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\cabac.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\context_ini.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="dec_api.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\erc_api.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\erc_do_i.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\erc_do_p.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\errorconcealment.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\fmo.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\header.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\image.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\intra16x16_pred.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\intra4x4_pred.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\intra8x8_pred.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\intra_chroma_pred.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\ldecod.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\macroblock.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\mb_access.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\mb_prediction.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\mbuffer.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\mc_prediction.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="lcommon\src\memalloc.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="lcommon\src\memcache.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\meminput.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="lcommon\src\mv_prediction.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\nal.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\nalu.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\nalucommon.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\output.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\parset.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="lcommon\src\parsetcommon.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\quant.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\sei.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\storable_picture.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="lcommon\src\transform.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\transform8x8.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\vlc.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="lcommon\src\win32.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\filter_chroma_horiz.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\filter_chroma_vert.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\filter_luma_horiz.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\filter_luma_vert.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\loopFilter.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\strength_horiz.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + <ClCompile Include="ldecod\src\strength_vert.c"> + <Filter>Source Files\deblocking filter</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="ldecod\inc\biaridecod.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\block.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\cabac.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\context_ini.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\contributors.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\ctx_tables.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="dec_api.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\defines.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\elements.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\erc_api.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\erc_do.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\erc_globals.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\errorconcealment.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\fmo.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\frame.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\global.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\header.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\ifunctions.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\image.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\intra16x16_pred.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\intra4x4_pred.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\intra8x8_pred.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\leaky_bucket.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\loopfilter.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\macroblock.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\mb_access.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\mbuffer.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\mc_prediction.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\memalloc.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\memcache.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\meminput.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\mv_prediction.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\nalu.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\nalucommon.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\optim.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\output.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\parset.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\parsetcommon.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\quant.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\sei.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\transform.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\transform8x8.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\types.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ldecod\inc\vlc.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="lcommon\inc\win32.h"> + <Filter>Header Files</Filter> + </ClInclude> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="ldecod\src\biari.asm"> + <Filter>Source Files</Filter> + </CustomBuild> + <CustomBuild Include="ldecod\src\macroblock.asm"> + <Filter>Source Files</Filter> + </CustomBuild> + <CustomBuild Include="ldecod\src\prediction.asm"> + <Filter>Source Files</Filter> + </CustomBuild> + </ItemGroup> +</Project>
\ No newline at end of file |