aboutsummaryrefslogtreecommitdiff
path: root/Src/h264dec
diff options
context:
space:
mode:
Diffstat (limited to 'Src/h264dec')
-rw-r--r--Src/h264dec/dec_api.c393
-rw-r--r--Src/h264dec/dec_api.h25
-rw-r--r--Src/h264dec/jm_vc9.sln19
-rw-r--r--Src/h264dec/lcommon/inc/ctx_tables.h994
-rw-r--r--Src/h264dec/lcommon/inc/enc_statistics.h60
-rw-r--r--Src/h264dec/lcommon/inc/frame.h50
-rw-r--r--Src/h264dec/lcommon/inc/ifunctions.h251
-rw-r--r--Src/h264dec/lcommon/inc/img_io.h28
-rw-r--r--Src/h264dec/lcommon/inc/mb_access.h70
-rw-r--r--Src/h264dec/lcommon/inc/memalloc.h71
-rw-r--r--Src/h264dec/lcommon/inc/memcache.h24
-rw-r--r--Src/h264dec/lcommon/inc/mv_prediction.h19
-rw-r--r--Src/h264dec/lcommon/inc/nalucommon.h64
-rw-r--r--Src/h264dec/lcommon/inc/quant_params.h55
-rw-r--r--Src/h264dec/lcommon/inc/transform.h27
-rw-r--r--Src/h264dec/lcommon/inc/typedefs.h71
-rw-r--r--Src/h264dec/lcommon/inc/types.h204
-rw-r--r--Src/h264dec/lcommon/inc/win32.h92
-rw-r--r--Src/h264dec/lcommon/src/img_io.c327
-rw-r--r--Src/h264dec/lcommon/src/memalloc.c1280
-rw-r--r--Src/h264dec/lcommon/src/memcache.c106
-rw-r--r--Src/h264dec/lcommon/src/mv_prediction.c250
-rw-r--r--Src/h264dec/lcommon/src/parsetcommon.c244
-rw-r--r--Src/h264dec/lcommon/src/transform.c809
-rw-r--r--Src/h264dec/lcommon/src/win32.c67
-rw-r--r--Src/h264dec/ldecod/inc/biaridecod.h157
-rw-r--r--Src/h264dec/ldecod/inc/block.h133
-rw-r--r--Src/h264dec/ldecod/inc/cabac.h64
-rw-r--r--Src/h264dec/ldecod/inc/context_ini.h23
-rw-r--r--Src/h264dec/ldecod/inc/contributors.h223
-rw-r--r--Src/h264dec/ldecod/inc/defines.h273
-rw-r--r--Src/h264dec/ldecod/inc/elements.h112
-rw-r--r--Src/h264dec/ldecod/inc/erc_api.h159
-rw-r--r--Src/h264dec/ldecod/inc/erc_do.h44
-rw-r--r--Src/h264dec/ldecod/inc/erc_globals.h52
-rw-r--r--Src/h264dec/ldecod/inc/errorconcealment.h20
-rw-r--r--Src/h264dec/ldecod/inc/fmo.h30
-rw-r--r--Src/h264dec/ldecod/inc/global.h1230
-rw-r--r--Src/h264dec/ldecod/inc/header.h22
-rw-r--r--Src/h264dec/ldecod/inc/image.h33
-rw-r--r--Src/h264dec/ldecod/inc/intra16x16_pred.h25
-rw-r--r--Src/h264dec/ldecod/inc/intra4x4_pred.h25
-rw-r--r--Src/h264dec/ldecod/inc/intra8x8_pred.h25
-rw-r--r--Src/h264dec/ldecod/inc/leaky_bucket.h26
-rw-r--r--Src/h264dec/ldecod/inc/loopfilter.h23
-rw-r--r--Src/h264dec/ldecod/inc/macroblock.h177
-rw-r--r--Src/h264dec/ldecod/inc/mb_prediction.h33
-rw-r--r--Src/h264dec/ldecod/inc/mbuffer.h235
-rw-r--r--Src/h264dec/ldecod/inc/mc_prediction.h61
-rw-r--r--Src/h264dec/ldecod/inc/meminput.h26
-rw-r--r--Src/h264dec/ldecod/inc/nalu.h28
-rw-r--r--Src/h264dec/ldecod/inc/optim.h46
-rw-r--r--Src/h264dec/ldecod/inc/output.h27
-rw-r--r--Src/h264dec/ldecod/inc/parset.h56
-rw-r--r--Src/h264dec/ldecod/inc/parsetcommon.h202
-rw-r--r--Src/h264dec/ldecod/inc/quant.h169
-rw-r--r--Src/h264dec/ldecod/inc/sei.h100
-rw-r--r--Src/h264dec/ldecod/inc/transform8x8.h24
-rw-r--r--Src/h264dec/ldecod/inc/vlc.h122
-rw-r--r--Src/h264dec/ldecod/src/biari.asm2540
-rw-r--r--Src/h264dec/ldecod/src/biaridecod.c322
-rw-r--r--Src/h264dec/ldecod/src/block.c929
-rw-r--r--Src/h264dec/ldecod/src/cabac.c2123
-rw-r--r--Src/h264dec/ldecod/src/context_ini.c123
-rw-r--r--Src/h264dec/ldecod/src/erc_api.c371
-rw-r--r--Src/h264dec/ldecod/src/erc_do_i.c544
-rw-r--r--Src/h264dec/ldecod/src/erc_do_p.c1742
-rw-r--r--Src/h264dec/ldecod/src/errorconcealment.c138
-rw-r--r--Src/h264dec/ldecod/src/filter_chroma_horiz.c533
-rw-r--r--Src/h264dec/ldecod/src/filter_chroma_vert.c570
-rw-r--r--Src/h264dec/ldecod/src/filter_luma_horiz.c871
-rw-r--r--Src/h264dec/ldecod/src/filter_luma_vert.c554
-rw-r--r--Src/h264dec/ldecod/src/fmo.c552
-rw-r--r--Src/h264dec/ldecod/src/header.c857
-rw-r--r--Src/h264dec/ldecod/src/image.c1699
-rw-r--r--Src/h264dec/ldecod/src/intra16x16_pred.c428
-rw-r--r--Src/h264dec/ldecod/src/intra4x4_pred.c854
-rw-r--r--Src/h264dec/ldecod/src/intra8x8_pred.c1928
-rw-r--r--Src/h264dec/ldecod/src/intra_chroma_pred.c357
-rw-r--r--Src/h264dec/ldecod/src/ldecod.c639
-rw-r--r--Src/h264dec/ldecod/src/loopFilter.c1338
-rw-r--r--Src/h264dec/ldecod/src/macroblock.asm189
-rw-r--r--Src/h264dec/ldecod/src/macroblock.c6475
-rw-r--r--Src/h264dec/ldecod/src/mb_access.c3388
-rw-r--r--Src/h264dec/ldecod/src/mb_prediction.c979
-rw-r--r--Src/h264dec/ldecod/src/mbuffer.c4409
-rw-r--r--Src/h264dec/ldecod/src/mc_prediction.c2420
-rw-r--r--Src/h264dec/ldecod/src/meminput.c134
-rw-r--r--Src/h264dec/ldecod/src/nal.c123
-rw-r--r--Src/h264dec/ldecod/src/nalu.c162
-rw-r--r--Src/h264dec/ldecod/src/nalucommon.c73
-rw-r--r--Src/h264dec/ldecod/src/output.c599
-rw-r--r--Src/h264dec/ldecod/src/parset.c779
-rw-r--r--Src/h264dec/ldecod/src/prediction.asm1626
-rw-r--r--Src/h264dec/ldecod/src/quant.c338
-rw-r--r--Src/h264dec/ldecod/src/sei.c2132
-rw-r--r--Src/h264dec/ldecod/src/storable_picture.c287
-rw-r--r--Src/h264dec/ldecod/src/strength_horiz.c659
-rw-r--r--Src/h264dec/ldecod/src/strength_vert.c594
-rw-r--r--Src/h264dec/ldecod/src/transform8x8.c696
-rw-r--r--Src/h264dec/ldecod/src/vlc.c1769
-rw-r--r--Src/h264dec/ldecod_vc9.vcxproj487
-rw-r--r--Src/h264dec/ldecod_vc9.vcxproj.filters304
103 files changed, 58239 insertions, 0 deletions
diff --git a/Src/h264dec/dec_api.c b/Src/h264dec/dec_api.c
new file mode 100644
index 00000000..aaaeb46a
--- /dev/null
+++ b/Src/h264dec/dec_api.c
@@ -0,0 +1,393 @@
+#include "dec_api.h"
+#include "global.h"
+#include "nalu.h"
+#include "image.h"
+#include "meminput.h"
+#include "output.h"
+#include "fmo.h"
+#include "erc_api.h"
+#include "parset.h"
+#include "memcache.h"
+#include "block.h"
+#include "optim.h"
+#include "mc_prediction.h"
+#include "vlc.h"
+#include <stddef.h> // for offsetof
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+OptimizedFunctions opt;
+
+DecoderParams *alloc_decoder();
+void Configure(VideoParameters *p_Vid, InputParameters *p_Inp);
+void malloc_slice(InputParameters *p_Inp, VideoParameters *p_Vid);
+void init (VideoParameters *p_Vid);
+void free_slice (Slice *currSlice);
+void free_img( VideoParameters *p_Vid);
+
+int sse2_flag = 0, mmx_flag=0, sse_flag=0, sse3_flag=0, sse4_1_flag=0;
+int H264_Init()
+{
+ int flags_edx, flags_ecx;
+ #ifdef H264_IPP
+ ippStaticInit();
+ #endif
+
+#ifdef _M_IX86
+ _asm {
+ mov eax, 1
+ cpuid
+ mov flags_edx, edx
+ mov flags_ecx, ecx
+ }
+ mmx_flag = flags_edx & 0x00800000;
+ sse_flag = flags_edx & 0x02000000;
+ sse2_flag = flags_edx & 0x04000000;
+ sse3_flag = flags_ecx & 0x00000001;
+ sse4_1_flag= flags_ecx & (1 << 19);
+
+#elif defined(_M_X64)
+ sse2_flag = 1;
+#endif
+
+#ifdef _M_IX86
+ /* if you get any compile errors here, you need to change biari.asm */
+ BUILD_BUG_ON(offsetof(TextureInfoContexts, map_contexts) != 436);
+ BUILD_BUG_ON(offsetof(TextureInfoContexts, last_contexts) != 3252);
+ BUILD_BUG_ON(offsetof(TextureInfoContexts, one_contexts) != 6068);
+ BUILD_BUG_ON(offsetof(TextureInfoContexts, abs_contexts) != 6508);
+
+ BUILD_BUG_ON(offsetof(Macroblock, p_Slice) != 0);
+ BUILD_BUG_ON(offsetof(Macroblock, p_Vid) != 4);
+ BUILD_BUG_ON(offsetof(Macroblock, qp) != 60);
+ BUILD_BUG_ON(offsetof(Macroblock, qpc) != 64);
+ BUILD_BUG_ON(offsetof(Macroblock, qp_scaled) != 72);
+ BUILD_BUG_ON(offsetof(Macroblock, cbp_blk) != 248);
+ BUILD_BUG_ON(offsetof(Macroblock, mb_field) != 344);
+ BUILD_BUG_ON(offsetof(Macroblock, read_and_store_CBP_block_bit) != 400);
+
+ BUILD_BUG_ON(offsetof(Slice, tex_ctx) != 100);
+ BUILD_BUG_ON(offsetof(Slice, mb_rec) != 1696);
+ BUILD_BUG_ON(offsetof(Slice, mb_pred) != 928);
+ BUILD_BUG_ON(offsetof(Slice, coeff) != 15632);
+ BUILD_BUG_ON(offsetof(Slice, coeff_ctr) != 15760);
+ BUILD_BUG_ON(offsetof(Slice, pos) != 15764);
+ BUILD_BUG_ON(offsetof(Slice, cof) != 2464);
+ BUILD_BUG_ON(offsetof(Slice, last_dquant) != 88);
+ BUILD_BUG_ON(offsetof(Slice, mot_ctx) != 96);
+ BUILD_BUG_ON(offsetof(Slice, slice_type) != 64);
+
+
+ BUILD_BUG_ON(offsetof(StorablePicture, structure) != 0);
+ BUILD_BUG_ON(offsetof(StorablePicture, chroma_qp_offset) != 158688);
+ BUILD_BUG_ON(offsetof(StorablePicture, motion) != 158524);
+ BUILD_BUG_ON(offsetof(StorablePicture, plane_images) != 158512);
+ BUILD_BUG_ON(offsetof(StorablePicture, imgY) != 158512);
+
+
+ BUILD_BUG_ON(offsetof(VideoParameters, structure) != 697200);
+ BUILD_BUG_ON(offsetof(VideoParameters, bitdepth_chroma_qp_scale) != 697456);
+ BUILD_BUG_ON(offsetof(VideoParameters, dec_picture) != 698192);
+
+ BUILD_BUG_ON(offsetof(DecodingEnvironment, Dcodestrm_len) != 16);
+ BUILD_BUG_ON(offsetof(DecodingEnvironment, Dcodestrm) != 12);
+ BUILD_BUG_ON(offsetof(DecodingEnvironment, DbitsLeft) != 8);
+ BUILD_BUG_ON(offsetof(DecodingEnvironment, Dvalue) != 4);
+ BUILD_BUG_ON(offsetof(DecodingEnvironment, Drange) != 0);
+
+ BUILD_BUG_ON(sizeof(BiContextType) != 4);
+ BUILD_BUG_ON(offsetof(BiContextType, state) != 0);
+ BUILD_BUG_ON(offsetof(BiContextType, MPS) != 2);
+
+ BUILD_BUG_ON(offsetof(OptimizedFunctions, copy_image_data_16x16_stride) != 32);
+#endif
+
+ if (sse2_flag)
+ {
+ //opt.itrans4x4 = itrans4x4_mmx;
+ opt.itrans8x8 = itrans8x8_sse2;
+ opt.weighted_mc_prediction16x16 = weighted_mc_prediction16x16_sse2;
+ opt.weighted_mc_prediction16x8 = weighted_mc_prediction16x8_sse2;
+ opt.weighted_mc_prediction8x8 = weighted_mc_prediction8x8_sse2;
+
+ opt.weighted_bi_prediction16x16 = weighted_bi_prediction16x16_sse2;
+ opt.weighted_bi_prediction16x8 = weighted_bi_prediction16x8_sse2;
+ opt.weighted_bi_prediction8x8 = weighted_bi_prediction8x8_sse2;
+
+ opt.bi_prediction8x8 = bi_prediction8x8_sse2;
+ opt.copy_image_data_16x16_stride = copy_image_data_16x16_stride_sse;
+ opt.code_from_bitstream_2d_5_4 = code_from_bitstream_2d_5_4_sse2;
+ opt.code_from_bitstream_2d_17_4 = code_from_bitstream_2d_17_4_sse2;
+ opt.code_from_bitstream_2d_16_1 = code_from_bitstream_2d_16_1_sse2;
+ }
+ else if (sse_flag && mmx_flag)
+ {
+ //opt.itrans4x4 = itrans4x4_mmx;
+ opt.itrans8x8 = itrans8x8_c;//itrans8x8_mmx;
+
+ opt.weighted_mc_prediction16x16 = weighted_mc_prediction16x16_ipp;
+ opt.weighted_mc_prediction16x8 = weighted_mc_prediction16x8_ipp;
+ opt.weighted_mc_prediction8x8 = weighted_mc_prediction8x8_ipp;
+
+ opt.weighted_bi_prediction16x16 = weighted_bi_prediction16x16_ipp;
+ opt.weighted_bi_prediction16x8 = weighted_bi_prediction16x8_ipp;
+ opt.weighted_bi_prediction8x8 = weighted_bi_prediction8x8_ipp;
+
+ opt.bi_prediction8x8 = bi_prediction8x8_ipp;
+ opt.copy_image_data_16x16_stride = copy_image_data_16x16_stride_sse;
+ opt.code_from_bitstream_2d_5_4 = code_from_bitstream_2d_5_4_c;
+ opt.code_from_bitstream_2d_17_4 = code_from_bitstream_2d_17_4_c;
+ opt.code_from_bitstream_2d_16_1 = code_from_bitstream_2d_16_1_c;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+h264_decoder_t H264_CreateDecoder()
+{
+ DecoderParams *decoder=alloc_decoder();
+
+ if (decoder)
+ {
+ InputParameters *p_Inp = decoder->p_Inp;
+ Configure(decoder->p_Vid, p_Inp);
+ p_Inp->intra_profile_deblocking = 1;
+
+ initBitsFile(decoder->p_Vid);
+
+ malloc_slice(decoder->p_Inp, decoder->p_Vid);
+ init_old_slice(decoder->p_Vid->old_slice);
+
+ init(decoder->p_Vid);
+
+ init_out_buffer(decoder->p_Vid);
+
+ decoder->p_Vid->current_mb_nr = -4711; // initialized to an impossible value for debugging -- correct value is taken from slice header
+
+ }
+ return decoder;
+}
+
+void H264_DestroyDecoder(h264_decoder_t d)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (decoder)
+ {
+ free_slice(decoder->p_Vid->currentSlice);
+ FmoFinit(decoder->p_Vid);
+
+ free_global_buffers(decoder->p_Vid);
+ flush_dpb(decoder->p_Vid);
+
+#if (PAIR_FIELDS_IN_OUTPUT)
+ flush_pending_output(decoder->p_Vid);
+#endif
+
+ out_storable_pictures_destroy(decoder->p_Vid);
+
+ ercClose(decoder->p_Vid, decoder->p_Vid->erc_errorVar);
+
+ CleanUpPPS(decoder->p_Vid);
+ free_dpb(decoder->p_Vid);
+ uninit_out_buffer(decoder->p_Vid);
+ image_cache_flush(&decoder->p_Vid->image_cache[0]);
+ image_cache_flush(&decoder->p_Vid->image_cache[1]);
+ motion_cache_flush(&decoder->p_Vid->motion_cache);
+ FreeNALU(decoder->p_Vid->nalu);
+ free (decoder->p_Inp);
+ free_img (decoder->p_Vid);
+ free(decoder);
+ }
+}
+
+void H264_DecodeFrame(h264_decoder_t d, const void *buffer, size_t bufferlen, uint64_t time_code)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ int ret;
+ memory_input_t *mem_input = decoder->p_Vid->mem_input;
+ mem_input->user_buffer=buffer;
+ mem_input->user_buffer_size=bufferlen;
+ mem_input->user_buffer_read=0;
+ __try
+ {
+ ret = decode_one_frame(decoder->p_Vid, time_code);
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER)
+ {
+ int x;
+ x=0;
+ }
+#ifdef _M_IX86
+ _mm_empty();
+#endif
+}
+
+void H264_GetPicture(h264_decoder_t d, StorablePicture **pic)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (pic)
+ {
+ out_storable_picture_get(decoder->p_Vid, pic);
+ }
+}
+
+static double GetAspectRatio(const vui_seq_parameters_t *vui)
+{
+ int aspect_ratio_width=1, aspect_ratio_height=1;
+
+ if (vui->aspect_ratio_info_present_flag)
+ {
+ switch(vui->aspect_ratio_idc)
+ {
+ case VUI_AR_UNDEFINED:
+ case VUI_AR_SQUARE:
+ aspect_ratio_width = 1;
+ aspect_ratio_height = 1;
+ break;
+ case VUI_AR_12_11:
+ aspect_ratio_width = 12;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_10_11:
+ aspect_ratio_width = 10;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_16_11:
+ aspect_ratio_width = 16;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_40_33:
+ aspect_ratio_width = 40;
+ aspect_ratio_height = 33;
+ break;
+ case VUI_AR_24_11:
+ aspect_ratio_width = 24;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_20_11:
+ aspect_ratio_width = 20;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_32_11:
+ aspect_ratio_width = 32;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_80_33:
+ aspect_ratio_width = 80;
+ aspect_ratio_height = 33;
+ break;
+ case VUI_AR_18_11:
+ aspect_ratio_width = 18;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_15_11:
+ aspect_ratio_width = 15;
+ aspect_ratio_height = 11;
+ break;
+ case VUI_AR_64_33:
+ aspect_ratio_width = 64;
+ aspect_ratio_height = 33;
+ break;
+ case VUI_AR_160_99:
+ aspect_ratio_width = 160;
+ aspect_ratio_height = 99;
+ break;
+ case VUI_AR_4_3:
+ aspect_ratio_width = 4;
+ aspect_ratio_height = 3;
+ break;
+ case VUI_AR_3_2:
+ aspect_ratio_width = 3;
+ aspect_ratio_height = 2;
+ break;;
+ case VUI_AR_2_1:
+ aspect_ratio_width = 2;
+ aspect_ratio_height = 1;
+ break;;
+ case VUI_EXTENDED_SAR:
+ default:
+ aspect_ratio_width = vui->sar_width;
+ aspect_ratio_height = vui->sar_height;
+ break;
+ }
+ }
+ return (double)aspect_ratio_width / (double)aspect_ratio_height;
+}
+
+const FrameFormat *H264_GetOutputFormat(h264_decoder_t d, double *aspect_ratio)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (decoder && decoder->p_Inp)
+ {
+ if (decoder->p_Vid->active_sps)
+ *aspect_ratio = GetAspectRatio(&decoder->p_Vid->active_sps->vui_seq_parameters);
+
+ return &decoder->p_Inp->output;
+ }
+ else
+ return 0;
+}
+
+void H264_Flush(h264_decoder_t d)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (decoder && decoder->p_Vid)
+ {
+ StorablePicture *pic=0;
+ exit_picture(decoder->p_Vid, &decoder->p_Vid->dec_picture);
+ if (pic)
+ free_storable_picture(decoder->p_Vid, pic);
+ pic=0;
+
+ decoder->p_Vid->frame_num = 0;
+ decoder->p_Vid->pre_frame_num = INT_MIN;
+ decoder->p_Vid->PreviousFrameNum=0;
+ decoder->p_Vid->PreviousFrameNumOffset = 0;
+ decoder->p_Vid->PrevPicOrderCntLsb = 0;
+ decoder->p_Vid->PrevPicOrderCntMsb = 0;
+ flush_dpb(decoder->p_Vid);
+
+ do
+ {
+ pic=0;
+ out_storable_picture_get(decoder->p_Vid, &pic);
+ if (pic)
+ free_storable_picture(decoder->p_Vid, pic);
+ } while (pic);
+ decoder->p_Vid->mem_input->resetting = 1;
+ }
+}
+
+void H264_FreePicture(h264_decoder_t d, StorablePicture *p)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (decoder && decoder->p_Vid && p)
+ {
+ free_storable_picture(decoder->p_Vid, p);
+ }
+}
+
+void H264_EndOfStream(h264_decoder_t d)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (decoder && decoder->p_Vid)
+ {
+ if (decoder->p_Vid->dec_picture)
+ exit_picture(decoder->p_Vid, &decoder->p_Vid->dec_picture);
+ else
+ flush_dpb(decoder->p_Vid);
+ }
+}
+
+void H264_HurryUp(h264_decoder_t d, int state)
+{
+ DecoderParams *decoder = (DecoderParams *)d;
+ if (decoder && decoder->p_Vid)
+ {
+ memory_input_t *mem_input = decoder->p_Vid->mem_input;
+ if (mem_input)
+ mem_input->skip_b_frames = state;
+ }
+} \ No newline at end of file
diff --git a/Src/h264dec/dec_api.h b/Src/h264dec/dec_api.h
new file mode 100644
index 00000000..41811666
--- /dev/null
+++ b/Src/h264dec/dec_api.h
@@ -0,0 +1,25 @@
+#pragma once
+#include <bfc/platform/types.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+ #include "ldecod/inc/mbuffer.h"
+#include "lcommon/inc/frame.h"
+
+typedef void *h264_decoder_t;
+
+int H264_Init(); // initializes the library. currently just does a CPU feature check (sse2, etc)
+h264_decoder_t H264_CreateDecoder();
+void H264_DestroyDecoder(h264_decoder_t decoder);
+
+void H264_DecodeFrame(h264_decoder_t decoder, const void *buffer, size_t bufferlen, uint64_t time_code);
+void H264_GetPicture(h264_decoder_t decoder, StorablePicture **pic);
+void H264_FreePicture(h264_decoder_t decoder, StorablePicture *pic);
+void H264_Flush(h264_decoder_t decoder);
+void H264_EndOfStream(h264_decoder_t decoder);
+void H264_HurryUp(h264_decoder_t decoder, int state);
+const FrameFormat *H264_GetOutputFormat(h264_decoder_t decoder, double *aspect_ratio);
+
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/Src/h264dec/jm_vc9.sln b/Src/h264dec/jm_vc9.sln
new file mode 100644
index 00000000..9d057c83
--- /dev/null
+++ b/Src/h264dec/jm_vc9.sln
@@ -0,0 +1,19 @@
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual Studio 2008
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ldecod", "ldecod_vc9.vcproj", "{5499B067-CF32-4141-A757-E0A29866994A}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {5499B067-CF32-4141-A757-E0A29866994A}.Debug|Win32.ActiveCfg = Debug|Win32
+ {5499B067-CF32-4141-A757-E0A29866994A}.Debug|Win32.Build.0 = Debug|Win32
+ {5499B067-CF32-4141-A757-E0A29866994A}.Release|Win32.ActiveCfg = Release|Win32
+ {5499B067-CF32-4141-A757-E0A29866994A}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/Src/h264dec/lcommon/inc/ctx_tables.h b/Src/h264dec/lcommon/inc/ctx_tables.h
new file mode 100644
index 00000000..28d622f3
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/ctx_tables.h
@@ -0,0 +1,994 @@
+
+/*!
+ *************************************************************************************
+ * \file ctx_tables.h
+ *
+ * \brief
+ * CABAC context initialization tables
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Detlev Marpe <marpe@hhi.de>
+ * - Heiko Schwarz <hschwarz@hhi.de>
+ **************************************************************************************
+ */
+
+#define CTX_UNUSED {0,64}
+#define CTX_UNDEF {0,63}
+
+#ifdef CONTEXT_INI_C
+
+
+#define NUM_CTX_MODELS_I 1
+#define NUM_CTX_MODELS_P 3
+
+
+static const char INIT_MB_TYPE_I[1][3][11][2] =
+{
+ //----- model 0 -----
+ {
+ { { 20, -15} , { 2, 54} , { 3, 74} , CTX_UNUSED , { -28, 127} , { -23, 104} , { -6, 53} , { -1, 54} , { 7, 51} , CTX_UNUSED , CTX_UNUSED },
+ { { 20, -15} , { 2, 54} , { 3, 74} , { 20, -15} , { 2, 54} , { 3, 74} , { -28, 127} , { -23, 104} , { -6, 53} , { -1, 54} , { 7, 51} }, // SI (unused at the moment)
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+static const char INIT_MB_TYPE_P[3][3][11][2] =
+{
+ //----- model 0 -----
+ {
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 23, 33} , { 23, 2} , { 21, 0} , CTX_UNUSED , { 1, 9} , { 0, 49} , { -37, 118} , { 5, 57} , { -13, 78} , { -11, 65} , { 1, 62} },
+ { { 26, 67} , { 16, 90} , { 9, 104} , CTX_UNUSED , { -46, 127} , { -20, 104} , { 1, 67} , { 18, 64} , { 9, 43} , { 29, 0} , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 22, 25} , { 34, 0} , { 16, 0} , CTX_UNUSED , { -2, 9} , { 4, 41} , { -29, 118} , { 2, 65} , { -6, 71} , { -13, 79} , { 5, 52} },
+ { { 57, 2} , { 41, 36} , { 26, 69} , CTX_UNUSED , { -45, 127} , { -15, 101} , { -4, 76} , { 26, 34} , { 19, 22} , { 40, 0} , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 29, 16} , { 25, 0} , { 14, 0} , CTX_UNUSED , { -10, 51} , { -3, 62} , { -27, 99} , { 26, 16} , { -4, 85} , { -24, 102} , { 5, 57} },
+ { { 54, 0} , { 37, 42} , { 12, 97} , CTX_UNUSED , { -32, 127} , { -22, 117} , { -2, 74} , { 20, 40} , { 20, 10} , { 29, 0} , CTX_UNUSED }
+ }
+};
+
+static const char INIT_B8_TYPE_I[1][2][9][2] =
+{
+ //----- model 0 -----
+ {
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_B8_TYPE_P[3][2][9][2] =
+{
+ //----- model 0 -----
+ {
+ { CTX_UNUSED , { 12, 49} , CTX_UNUSED , { -4, 73} , { 17, 50} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -6, 86} , { -17, 95} , { -6, 61} , { 9, 45} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { CTX_UNUSED , { 9, 50} , CTX_UNUSED , { -3, 70} , { 10, 54} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 6, 69} , { -13, 90} , { 0, 52} , { 8, 43} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { CTX_UNUSED , { 6, 57} , CTX_UNUSED , { -17, 73} , { 14, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -6, 93} , { -14, 88} , { -6, 44} , { 4, 55} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_MV_RES_I[1][2][10][2] =
+{
+ //----- model 0 -----
+ {
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_MV_RES_P[3][2][10][2] =
+{
+ //----- model 0 -----
+ {
+ { { -3, 69} , CTX_UNUSED , { -6, 81} , { -11, 96} , CTX_UNUSED , { 0, 58} , CTX_UNUSED , { -3, 76} , { -10, 94} , CTX_UNUSED },
+ { { 6, 55} , { 7, 67} , { -5, 86} , { 2, 88} , CTX_UNUSED , { 5, 54} , { 4, 69} , { -3, 81} , { 0, 88} , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { { -2, 69} , CTX_UNUSED , { -5, 82} , { -10, 96} , CTX_UNUSED , { 1, 56} , CTX_UNUSED , { -3, 74} , { -6, 85} , CTX_UNUSED },
+ { { 2, 59} , { 2, 75} , { -3, 87} , { -3, 100} , CTX_UNUSED , { 0, 59} , { -3, 81} , { -7, 86} , { -5, 95} , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { { -11, 89} , CTX_UNUSED , { -15, 103} , { -21, 116} , CTX_UNUSED , { 1, 63} , CTX_UNUSED , { -5, 85} , { -13, 106} , CTX_UNUSED },
+ { { 19, 57} , { 20, 58} , { 4, 84} , { 6, 96} , CTX_UNUSED , { 5, 63} , { 6, 75} , { -3, 90} , { -1, 101} , CTX_UNUSED }
+ }
+};
+
+static const char INIT_REF_NO_I[1][2][6][2] =
+{
+ //----- model 0 -----
+ {
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_REF_NO_P[3][2][6][2] =
+{
+ //----- model 0 -----
+ {
+ { { -7, 67} , { -5, 74} , { -4, 74} , { -5, 80} , { -7, 72} , { 1, 58} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { { -1, 66} , { -1, 77} , { 1, 70} , { -2, 86} , { -5, 72} , { 0, 61} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { { 3, 55} , { -4, 79} , { -2, 75} , { -12, 97} , { -7, 50} , { 1, 60} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+
+static const char INIT_TRANSFORM_SIZE_I[1][1][3][2]=
+{
+ //----- model 0 -----
+ {
+ { { 31, 21} , { 31, 31} , { 25, 50} },
+// { { 0, 41} , { 0, 63} , { 0, 63} },
+ }
+};
+
+static const char INIT_TRANSFORM_SIZE_P[3][1][3][2]=
+{
+ //----- model 0 -----
+ {
+ { { 12, 40} , { 11, 51} , { 14, 59} },
+// { { 0, 41} , { 0, 63} , { 0, 63} },
+ },
+ //----- model 1 -----
+ {
+ { { 25, 32} , { 21, 49} , { 21, 54} },
+// { { 0, 41} , { 0, 63} , { 0, 63} },
+ },
+ //----- model 2 -----
+ {
+ { { 21, 33} , { 19, 50} , { 17, 61} },
+// { { 0, 41} , { 0, 63} , { 0, 63} },
+ }
+};
+
+static const char INIT_DELTA_QP_I[1][1][4][2]=
+{
+ //----- model 0 -----
+ {
+ { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} },
+ }
+};
+static const char INIT_DELTA_QP_P[3][1][4][2]=
+{
+ //----- model 0 -----
+ {
+ { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} },
+ },
+ //----- model 1 -----
+ {
+ { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} },
+ },
+ //----- model 2 -----
+ {
+ { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} },
+ }
+};
+
+static const char INIT_MB_AFF_I[1][1][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { 0, 11} , { 1, 55} , { 0, 69} , CTX_UNUSED }
+ }
+};
+static const char INIT_MB_AFF_P[3][1][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { 0, 45} , { -4, 78} , { -3, 96} , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { { 13, 15} , { 7, 51} , { 2, 80} , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { { 7, 34} , { -9, 88} , { -20, 127} , CTX_UNUSED }
+ }
+};
+
+static const char INIT_IPR_I[1][1][2][2] =
+{
+ //----- model 0 -----
+ {
+ { { 13, 41} , { 3, 62} }
+ }
+};
+
+static const char INIT_IPR_P[3][1][2][2] =
+{
+ //----- model 0 -----
+ {
+ { { 13, 41} , { 3, 62} }
+ },
+ //----- model 1 -----
+ {
+ { { 13, 41} , { 3, 62} }
+ },
+ //----- model 2 -----
+ {
+ { { 13, 41} , { 3, 62} }
+ }
+};
+
+static const char INIT_CIPR_I[1][1][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} }
+ }
+};
+
+static const char INIT_CIPR_P[3][1][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} }
+ },
+ //----- model 1 -----
+ {
+ { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} }
+ },
+ //----- model 2 -----
+ {
+ { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} }
+ }
+};
+
+static const char INIT_CBP_I[1][3][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { -17, 127} , { -13, 102} , { 0, 82} , { -7, 74} },
+ { { -21, 107} , { -27, 127} , { -31, 127} , { -24, 127} },
+ { { -18, 95} , { -27, 127} , { -21, 114} , { -30, 127} }
+ }
+};
+
+static const char INIT_CBP_P[3][3][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { -27, 126} , { -28, 98} , { -25, 101} , { -23, 67} },
+ { { -28, 82} , { -20, 94} , { -16, 83} , { -22, 110} },
+ { { -21, 91} , { -18, 102} , { -13, 93} , { -29, 127} }
+ },
+ //----- model 1 -----
+ {
+ { { -39, 127} , { -18, 91} , { -17, 96} , { -26, 81} },
+ { { -35, 98} , { -24, 102} , { -23, 97} , { -27, 119} },
+ { { -24, 99} , { -21, 110} , { -18, 102} , { -36, 127} }
+ },
+ //----- model 2 -----
+ {
+ { { -36, 127} , { -17, 91} , { -14, 95} , { -25, 84} },
+ { { -25, 86} , { -12, 89} , { -17, 91} , { -31, 127} },
+ { { -14, 76} , { -18, 103} , { -13, 90} , { -37, 127} }
+ }
+};
+
+static const char INIT_BCBP_I[1][22][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} },
+ { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} },
+ { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} },
+ { { -1, 74} , { -6, 97} , { -7, 91} , { -20, 127} },
+ { { -4, 56} , { -5, 82} , { -7, 76} , { -22, 125} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cb in the 4:4:4 common mode
+ { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} },
+ { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} },
+ { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cr in the 4:4:4 common mode
+ { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} },
+ { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} },
+ { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_BCBP_P[3][22][4][2] =
+{
+ //----- model 0 -----
+ {
+ { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} },
+ { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} },
+ { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} },
+ { { 5, 54} , { 6, 60} , { 6, 59} , { 6, 69} },
+ { { -1, 48} , { 0, 68} , { -4, 69} , { -8, 88} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cb in the 4:4:4 common mode
+ { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} },
+ { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} },
+ { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cr in the 4:4:4 common mode
+ { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} },
+ { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} },
+ { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} },
+ { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} },
+ { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} },
+ { { 3, 55} , { 7, 56} , { 7, 55} , { 8, 61} },
+ { { -3, 53} , { 0, 68} , { -7, 74} , { -9, 88} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cb in the 4:4:4 common mode
+ { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} },
+ { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} },
+ { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cr in the 4:4:4 common mode
+ { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} },
+ { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} },
+ { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} },
+ { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} },
+ { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} },
+ { { 0, 65} , { -2, 79} , { 0, 72} , { -4, 92} },
+ { { -6, 56} , { 3, 68} , { -8, 71} , { -13, 98} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cb in the 4:4:4 common mode
+ { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} },
+ { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} },
+ { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ // Cr in the 4:4:4 common mode
+ { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} },
+ { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} },
+ { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_MAP_I[1][22][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} },
+ { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} },
+ { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} },
+ { { -8, 102} , { -15, 100} , { 0, 95} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { -4, 75} , { 2, 72} , { -11, 75} , { -3, 71} , { 15, 46} , { -13, 69} , { 0, 62} , { 0, 65} , { 21, 37} , { -15, 72} , { 9, 57} , { 16, 54} , { 0, 62} , { 12, 72} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} },
+ { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} },
+ { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} },
+ //Cr in the 4:4:4 common mode
+ { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} },
+ { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} },
+ { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} }
+ }
+};
+
+static const char INIT_MAP_P[3][22][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} },
+ { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} },
+ { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} },
+ { { 3, 64} , { 1, 61} , { 9, 63} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 7, 50} , { 16, 39} , { 5, 44} , { 4, 52} , { 11, 48} , { -5, 60} , { -1, 59} , { 0, 59} , { 22, 33} , { 5, 44} , { 14, 43} , { -1, 78} , { 0, 60} , { 9, 69} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} },
+ { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} },
+ { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} },
+ //Cr in the 4:4:4 common mode
+ { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} },
+ { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} },
+ { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} }
+ },
+ //----- model 1 -----
+ {
+ { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} },
+ { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} },
+ { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} },
+ { { -4, 71} , { 0, 58} , { 7, 61} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 9, 41} , { 18, 25} , { 9, 32} , { 5, 43} , { 9, 47} , { 0, 44} , { 0, 51} , { 2, 46} , { 19, 38} , { -4, 66} , { 15, 38} , { 12, 42} , { 9, 34} , { 0, 89} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} },
+ { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} },
+ { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} },
+ //Cr in the 4:4:4 common mode
+ { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} },
+ { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} },
+ { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} }
+ },
+ //----- model 2 -----
+ {
+ { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} },
+ { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} },
+ { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} },
+ { { 3, 65} , { -7, 69} , { 8, 77} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { -10, 66} , { 3, 62} , { -3, 68} , { -20, 81} , { 0, 30} , { 1, 7} , { -3, 23} , { -21, 74} , { 16, 66} , { -23, 124} , { 17, 37} , { 44, -18} , { 50, -34} , { -22, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} },
+ { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} },
+ { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} },
+ //Cr in the 4:4:4 common mode
+ { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} },
+ { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} },
+ { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} }
+ }
+};
+
+static const char INIT_LAST_I[1][22][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} },
+ { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} },
+ { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} },
+ { { 30, -6} , { 27, 3} , { 26, 22} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 37, -16} , { 35, -4} , { 38, -8} , { 38, -3} , { 37, 3} , { 38, 5} , { 42, 0} , { 35, 16} , { 39, 22} , { 14, 48} , { 27, 37} , { 21, 60} , { 12, 68} , { 2, 97} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} },
+ { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} },
+ { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} },
+ //Cr in the 4:4:4 common mode
+ { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} },
+ { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} },
+ { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} }
+ }
+};
+
+static const char INIT_LAST_P[3][22][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} },
+ { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} },
+ { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} },
+ { { 1, 67} , { 5, 59} , { 9, 67} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 16, 30} , { 18, 32} , { 18, 35} , { 22, 29} , { 24, 31} , { 23, 38} , { 18, 43} , { 20, 41} , { 11, 63} , { 9, 59} , { 9, 64} , { -1, 94} , { -2, 89} , { -9, 108} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} },
+ { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} },
+ { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} },
+ //Cr in the 4:4:4 common mode
+ { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} },
+ { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} },
+ { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} }
+ },
+ //----- model 1 -----
+ {
+ { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} },
+ { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} },
+ { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} },
+ { { 0, 75} , { 2, 72} , { 8, 77} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 14, 35} , { 18, 31} , { 17, 35} , { 21, 30} , { 17, 45} , { 20, 42} , { 18, 45} , { 27, 26} , { 16, 54} , { 7, 66} , { 16, 56} , { 11, 73} , { 10, 67} , { -10, 116} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} },
+ { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} },
+ { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} },
+ //Cr in the 4:4:4 common mode
+ { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} },
+ { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} },
+ { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} }
+ },
+ //----- model 2 -----
+ {
+ { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} },
+ { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} },
+ { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} },
+ { { 20, 34} , { 19, 31} , { 27, 44} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 19, 16} , { 15, 36} , { 15, 36} , { 21, 28} , { 25, 21} , { 30, 20} , { 31, 12} , { 27, 16} , { 24, 42} , { 0, 93} , { 14, 56} , { 15, 57} , { 26, 38} , { -24, 127} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} },
+ { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} },
+ { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} },
+ //Cr in the 4:4:4 common mode
+ { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} },
+ { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} },
+ { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} }
+ }
+};
+
+static const char INIT_ONE_I[1][22][5][2] =
+{
+ //----- model 0 -----
+ {
+ { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} },
+ { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} },
+ { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} },
+ { { -11, 97} , { -20, 84} , { -11, 79} , { -6, 73} , { -4, 74} },
+ { { -8, 78} , { -5, 33} , { -4, 48} , { -2, 53} , { -3, 62} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} },
+ { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} },
+ { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} },
+ { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} },
+ { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_ONE_P[3][22][5][2] =
+{
+ //----- model 0 -----
+ {
+ { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} },
+ { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} },
+ { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} },
+ { { 0, 70} , { -4, 29} , { 5, 31} , { 7, 42} , { 1, 59} },
+ { { 0, 58} , { 8, 5} , { 10, 14} , { 14, 18} , { 13, 27} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} },
+ { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} },
+ { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} },
+ { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} },
+ { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 1 -----
+ {
+ { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} },
+ { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} },
+ { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} },
+ { { 2, 66} , { -9, 34} , { 1, 32} , { 11, 31} , { 5, 52} },
+ { { 3, 52} , { 7, 4} , { 10, 8} , { 17, 8} , { 16, 19} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} },
+ { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} },
+ { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} },
+ { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} },
+ { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ },
+ //----- model 2 -----
+ {
+ { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} },
+ { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} },
+ { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} },
+ { { -4, 79} , { -22, 69} , { -16, 75} , { -2, 58} , { 1, 58} },
+ { { -13, 81} , { -6, 38} , { -13, 62} , { -6, 58} , { -2, 59} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} },
+ { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} },
+ { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} },
+ { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} },
+ { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_ABS_I[1][22][5][2] =
+{
+ //----- model 0 -----
+ {
+ { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} },
+ { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} },
+ { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} },
+ { { -13, 86} , { -13, 96} , { -11, 97} , { -19, 117} , CTX_UNUSED },
+ { { -13, 71} , { -10, 79} , { -12, 86} , { -13, 90} , { -14, 97} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} },
+ { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} },
+ { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} },
+ { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} },
+ { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+static const char INIT_ABS_P[3][22][5][2] =
+{
+ //----- model 0 -----
+ {
+ { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} },
+ { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} },
+ { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} },
+ { { -2, 58} , { -3, 72} , { -3, 81} , { -11, 97} , CTX_UNUSED },
+ { { 2, 40} , { 0, 58} , { -3, 70} , { -6, 79} , { -8, 85} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} },
+ { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} },
+ { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} },
+ { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} },
+ { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ },
+ //----- model 1 -----
+ {
+ { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} },
+ { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} },
+ { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} },
+ { { -2, 55} , { -2, 67} , { 0, 73} , { -8, 89} , CTX_UNUSED },
+ { { 3, 37} , { -1, 61} , { -5, 73} , { -1, 70} , { -4, 78} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} },
+ { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} },
+ { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} },
+ { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} },
+ { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ },
+ //----- model 2 -----
+ {
+ { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} },
+ { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} },
+ { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} },
+ { { -13, 78} , { -9, 83} , { -4, 81} , { -13, 99} , CTX_UNUSED },
+ { { -16, 73} , { -10, 76} , { -13, 86} , { -9, 83} , { -10, 87} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cb in the 4:4:4 common mode
+ { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} },
+ { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} },
+ { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ //Cr in the 4:4:4 common mode
+ { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} },
+ { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} },
+ { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }
+ }
+};
+
+
+
+#if ENABLE_FIELD_CTX
+static const char INIT_FLD_MAP_I[1][8][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { -6, 93} , { -6, 84} , { -8, 79} , { 0, 66} , { -1, 71} , { 0, 62} , { -2, 60} , { -2, 59} , { -5, 75} , { -3, 62} , { -4, 58} , { -9, 66} , { -1, 79} , { 0, 71} , { 3, 68} },
+ { CTX_UNUSED , { 10, 44} , { -7, 62} , { 15, 36} , { 14, 40} , { 16, 27} , { 12, 29} , { 1, 44} , { 20, 36} , { 18, 32} , { 5, 42} , { 1, 48} , { 10, 62} , { 17, 46} , { 9, 64} },
+ { { -14, 106} , { -13, 97} , { -15, 90} , { -12, 90} , { -18, 88} , { -10, 73} , { -9, 79} , { -14, 86} , { -10, 73} , { -10, 70} , { -10, 69} , { -5, 66} , { -9, 64} , { -5, 58} , { 2, 59} },
+// { { -1, 73} , { -7, 73} , { -6, 76} , { -7, 71} , { -9, 72} , { -5, 65} , { -14, 83} , { -8, 72} , { -10, 75} , { -5, 64} , { -4, 59} , { -13, 79} , { -9, 69} , { -8, 66} , { 3, 55} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -12, 104} , { -11, 97} , { -16, 96} , { -7, 88} , { -8, 85} , { -7, 85} , { -9, 85} , { -13, 88} , { 4, 66} , { -3, 77} , { -3, 76} , { -6, 76} , { 10, 58} , { -1, 76} , { -1, 83} },
+ { { -7, 99} , { -14, 95} , { 2, 95} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 0, 76} , { -5, 74} , { 0, 70} , { -11, 75} , { 1, 68} , { 0, 65} , { -14, 73} , { 3, 62} , { 4, 62} , { -1, 68} , { -13, 75} , { 11, 55} , { 5, 64} , { 12, 70} }
+ }
+};
+
+static const char INIT_FLD_MAP_P[3][8][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { -13, 106} , { -16, 106} , { -10, 87} , { -21, 114} , { -18, 110} , { -14, 98} , { -22, 110} , { -21, 106} , { -18, 103} , { -21, 107} , { -23, 108} , { -26, 112} , { -10, 96} , { -12, 95} , { -5, 91} },
+ { CTX_UNUSED , { -9, 93} , { -22, 94} , { -5, 86} , { 9, 67} , { -4, 80} , { -10, 85} , { -1, 70} , { 7, 60} , { 9, 58} , { 5, 61} , { 12, 50} , { 15, 50} , { 18, 49} , { 17, 54} },
+ { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} },
+// { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 10, 41} , { 7, 46} , { -1, 51} , { 7, 49} , { 8, 52} , { 9, 41} , { 6, 47} , { 2, 55} , { 13, 41} , { 10, 44} , { 6, 50} , { 5, 53} , { 13, 49} , { 4, 63} , { 6, 64} },
+ { { -2, 69} , { -2, 59} , { 6, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 10, 44} , { 9, 31} , { 12, 43} , { 3, 53} , { 14, 34} , { 10, 38} , { -3, 52} , { 13, 40} , { 17, 32} , { 7, 44} , { 7, 38} , { 13, 50} , { 10, 57} , { 26, 43} }
+ },
+ //----- model 1 -----
+ {
+ { { -21, 126} , { -23, 124} , { -20, 110} , { -26, 126} , { -25, 124} , { -17, 105} , { -27, 121} , { -27, 117} , { -17, 102} , { -26, 117} , { -27, 116} , { -33, 122} , { -10, 95} , { -14, 100} , { -8, 95} },
+ { CTX_UNUSED , { -17, 111} , { -28, 114} , { -6, 89} , { -2, 80} , { -4, 82} , { -9, 85} , { -8, 81} , { -1, 72} , { 5, 64} , { 1, 67} , { 9, 56} , { 0, 69} , { 1, 69} , { 7, 69} },
+ { { -3, 81} , { -3, 76} , { -7, 72} , { -6, 78} , { -12, 72} , { -14, 68} , { -3, 70} , { -6, 76} , { -5, 66} , { -5, 62} , { 0, 57} , { -4, 61} , { -9, 60} , { 1, 54} , { 2, 58} },
+// { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { -7, 69} , { -6, 67} , { -16, 77} , { -2, 64} , { 2, 61} , { -6, 67} , { -3, 64} , { 2, 57} , { -3, 65} , { -3, 66} , { 0, 62} , { 9, 51} , { -1, 66} , { -2, 71} , { -2, 75} },
+ { { -1, 70} , { -9, 72} , { 14, 60} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 16, 37} , { 0, 47} , { 18, 35} , { 11, 37} , { 12, 41} , { 10, 41} , { 2, 48} , { 12, 41} , { 13, 41} , { 0, 59} , { 3, 50} , { 19, 40} , { 3, 66} , { 18, 50} }
+ },
+ //----- model 2 -----
+ {
+ { { -22, 127} , { -25, 127} , { -25, 120} , { -27, 127} , { -19, 114} , { -23, 117} , { -25, 118} , { -26, 117} , { -24, 113} , { -28, 118} , { -31, 120} , { -37, 124} , { -10, 94} , { -15, 102} , { -10, 99} },
+ { CTX_UNUSED , { -13, 106} , { -50, 127} , { -5, 92} , { 17, 57} , { -5, 86} , { -13, 94} , { -12, 91} , { -2, 77} , { 0, 71} , { -1, 73} , { 4, 64} , { -7, 81} , { 5, 64} , { 15, 57} },
+ { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} },
+// { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 1, 67} , { 0, 68} , { -10, 67} , { 1, 68} , { 0, 77} , { 2, 64} , { 0, 68} , { -5, 78} , { 7, 55} , { 5, 59} , { 2, 65} , { 14, 54} , { 15, 44} , { 5, 60} , { 2, 70} },
+ { { -2, 76} , { -18, 86} , { 12, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 5, 64} , { -12, 70} , { 11, 55} , { 5, 56} , { 0, 69} , { 2, 65} , { -6, 74} , { 5, 54} , { 7, 54} , { -6, 76} , { -11, 82} , { -2, 77} , { -2, 77} , { 25, 42} }
+ }
+};
+
+static const char INIT_FLD_LAST_I[1][8][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { 15, 6} , { 6, 19} , { 7, 16} , { 12, 14} , { 18, 13} , { 13, 11} , { 13, 15} , { 15, 16} , { 12, 23} , { 13, 23} , { 15, 20} , { 14, 26} , { 14, 44} , { 17, 40} , { 17, 47} },
+ { CTX_UNUSED , { 24, 17} , { 21, 21} , { 25, 22} , { 31, 27} , { 22, 29} , { 19, 35} , { 14, 50} , { 10, 57} , { 7, 63} , { -2, 77} , { -4, 82} , { -3, 94} , { 9, 69} , { -12, 109} },
+ { { 21, -10} , { 24, -11} , { 28, -8} , { 28, -1} , { 29, 3} , { 29, 9} , { 35, 20} , { 29, 36} , { 14, 67} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+// { { 12, 33} , { 5, 38} , { 9, 34} , { 18, 22} , { 19, 22} , { 23, 19} , { 26, 16} , { 14, 44} , { 40, 14} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 36, -35} , { 36, -34} , { 32, -26} , { 37, -30} , { 44, -32} , { 34, -18} , { 34, -15} , { 40, -15} , { 33, -7} , { 35, -5} , { 33, 0} , { 38, 2} , { 33, 13} , { 23, 35} , { 13, 58} },
+ { { 29, -3} , { 26, 0} , { 22, 30} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 31, -7} , { 35, -15} , { 34, -3} , { 34, 3} , { 36, -1} , { 34, 5} , { 32, 11} , { 35, 5} , { 34, 12} , { 39, 11} , { 30, 29} , { 34, 26} , { 29, 39} , { 19, 66} }
+ }
+};
+
+static const char INIT_FLD_LAST_P[3][8][15][2] =
+{
+ //----- model 0 -----
+ {
+ { { 14, 11} , { 11, 14} , { 9, 11} , { 18, 11} , { 21, 9} , { 23, -2} , { 32, -15} , { 32, -15} , { 34, -21} , { 39, -23} , { 42, -33} , { 41, -31} , { 46, -28} , { 38, -12} , { 21, 29} },
+ { CTX_UNUSED , { 45, -24} , { 53, -45} , { 48, -26} , { 65, -43} , { 43, -19} , { 39, -10} , { 30, 9} , { 18, 26} , { 20, 27} , { 0, 57} , { -14, 82} , { -5, 75} , { -19, 97} , { -35, 125} },
+ { { 21, -13} , { 33, -14} , { 39, -7} , { 46, -2} , { 51, 2} , { 60, 6} , { 61, 17} , { 55, 34} , { 42, 62} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+// { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 27, 0} , { 28, 0} , { 31, -4} , { 27, 6} , { 34, 8} , { 30, 10} , { 24, 22} , { 33, 19} , { 22, 32} , { 26, 31} , { 21, 41} , { 26, 44} , { 23, 47} , { 16, 65} , { 14, 71} },
+ { { 8, 60} , { 6, 63} , { 17, 65} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 21, 24} , { 23, 20} , { 26, 23} , { 27, 32} , { 28, 23} , { 28, 24} , { 23, 40} , { 24, 32} , { 28, 29} , { 23, 42} , { 19, 57} , { 22, 53} , { 22, 61} , { 11, 86} }
+ },
+ //----- model 1 -----
+ {
+ { { 19, -6} , { 18, -6} , { 14, 0} , { 26, -12} , { 31, -16} , { 33, -25} , { 33, -22} , { 37, -28} , { 39, -30} , { 42, -30} , { 47, -42} , { 45, -36} , { 49, -34} , { 41, -17} , { 32, 9} },
+ { CTX_UNUSED , { 69, -71} , { 63, -63} , { 66, -64} , { 77, -74} , { 54, -39} , { 52, -35} , { 41, -10} , { 36, 0} , { 40, -1} , { 30, 14} , { 28, 26} , { 23, 37} , { 12, 55} , { 11, 65} },
+ { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+// { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 37, -33} , { 39, -36} , { 40, -37} , { 38, -30} , { 46, -33} , { 42, -30} , { 40, -24} , { 49, -29} , { 38, -12} , { 40, -10} , { 38, -3} , { 46, -5} , { 31, 20} , { 29, 30} , { 25, 44} },
+ { { 12, 48} , { 11, 49} , { 26, 45} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 22, 22} , { 23, 22} , { 27, 21} , { 33, 20} , { 26, 28} , { 30, 24} , { 27, 34} , { 18, 42} , { 25, 39} , { 18, 50} , { 12, 70} , { 21, 54} , { 14, 71} , { 11, 83} }
+ },
+ //----- model 2 -----
+ {
+ { { 17, -13} , { 16, -9} , { 17, -12} , { 27, -21} , { 37, -30} , { 41, -40} , { 42, -41} , { 48, -47} , { 39, -32} , { 46, -40} , { 52, -51} , { 46, -41} , { 52, -39} , { 43, -19} , { 32, 11} },
+ { CTX_UNUSED , { 61, -55} , { 56, -46} , { 62, -50} , { 81, -67} , { 45, -20} , { 35, -2} , { 28, 15} , { 34, 1} , { 39, 1} , { 30, 17} , { 20, 38} , { 18, 45} , { 15, 54} , { 0, 79} },
+ { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+// { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { { 36, -16} , { 37, -14} , { 37, -17} , { 32, 1} , { 34, 15} , { 29, 15} , { 24, 25} , { 34, 22} , { 31, 16} , { 35, 18} , { 31, 28} , { 33, 41} , { 36, 28} , { 27, 47} , { 21, 62} },
+ { { 18, 31} , { 19, 26} , { 36, 24} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED },
+ { CTX_UNUSED , { 24, 23} , { 27, 16} , { 24, 30} , { 31, 29} , { 22, 41} , { 22, 42} , { 16, 60} , { 15, 52} , { 14, 60} , { 3, 78} , { -16, 123} , { 21, 53} , { 22, 56} , { 25, 61} }
+ }
+};
+#endif
+
+
+#endif
+
diff --git a/Src/h264dec/lcommon/inc/enc_statistics.h b/Src/h264dec/lcommon/inc/enc_statistics.h
new file mode 100644
index 00000000..534a7d4c
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/enc_statistics.h
@@ -0,0 +1,60 @@
+/*!
+ **************************************************************************
+ * \file enc_statistics.h
+ *
+ * \brief
+ * statistics reports for the encoding process.
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Tourapis <alexismt@ieee.org>
+ * - Karsten Sühring <suehring@hhi.de>
+ *
+ **************************************************************************
+ */
+
+#ifndef _ENC_STATISTICS_H_
+#define _ENC_STATISTICS_H_
+#include "global.h"
+
+struct stat_parameters
+{
+ float bitr; //!< bit rate for current frame, used only for output til terminal
+ float bitrate; //!< average bit rate for the sequence except first frame
+ int64 bit_ctr; //!< counter for bit usage
+ int64 bit_ctr_n; //!< bit usage for the current frame
+ int64 bit_ctr_emulationprevention; //!< stored bits needed to prevent start code emulation
+ int bit_slice; //!< number of bits in current slice
+ int stored_bit_slice; //!< keep number of bits in current slice (to restore status in case of MB re-encoding)
+ int b8_mode_0_use [NUM_SLICE_TYPES][2];
+ int64 mode_use_transform[NUM_SLICE_TYPES][MAXMODE][2];
+ int64 intra_chroma_mode[4];
+
+ // B pictures
+ int NumberBFrames;
+
+ int frame_counter;
+ int64 quant [NUM_SLICE_TYPES];
+ int64 num_macroblocks [NUM_SLICE_TYPES];
+ int frame_ctr [NUM_SLICE_TYPES];
+ int64 bit_counter [NUM_SLICE_TYPES];
+ float bitrate_st [NUM_SLICE_TYPES];
+ int64 mode_use [NUM_SLICE_TYPES][MAXMODE]; //!< Macroblock mode usage for Intra frames
+ int64 bit_use_mode [NUM_SLICE_TYPES][MAXMODE]; //!< statistics of bit usage
+ int64 bit_use_mb_type [NUM_SLICE_TYPES];
+ int64 bit_use_header [NUM_SLICE_TYPES];
+ int64 tmp_bit_use_cbp [NUM_SLICE_TYPES];
+ int64 bit_use_coeffC [NUM_SLICE_TYPES];
+ int64 bit_use_coeff [3][NUM_SLICE_TYPES];
+ int64 bit_use_delta_quant [NUM_SLICE_TYPES];
+ int64 bit_use_stuffingBits[NUM_SLICE_TYPES];
+
+ int bit_ctr_parametersets;
+ int bit_ctr_parametersets_n;
+ int64 bit_ctr_filler_data;
+ int64 bit_ctr_filler_data_n;
+
+};
+typedef struct stat_parameters StatParameters;
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/frame.h b/Src/h264dec/lcommon/inc/frame.h
new file mode 100644
index 00000000..25507d51
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/frame.h
@@ -0,0 +1,50 @@
+
+/*!
+ ************************************************************************
+ * \file frame.h
+ *
+ * \brief
+ * headers for frame format related information
+ *
+ * \author
+ *
+ ************************************************************************
+ */
+#ifndef H264_FRAME_H_
+#define H264_FRAME_H_
+#pragma once
+
+typedef enum {
+ CM_UNKNOWN = -1,
+ CM_YUV = 0,
+ CM_RGB = 1,
+ CM_XYZ = 2
+} ColorModel;
+
+typedef enum {
+ CF_UNKNOWN = -1, //!< Unknown color format
+ YUV400 = 0, //!< Monochrome
+ YUV420 = 1, //!< 4:2:0
+ YUV422 = 2, //!< 4:2:2
+ YUV444 = 3 //!< 4:4:4
+} ColorFormat;
+
+typedef struct frame_format
+{
+ ColorFormat yuv_format; //!< YUV format (0=4:0:0, 1=4:2:0, 2=4:2:2, 3=4:4:4)
+ int width; //!< luma component frame width
+ int height; //!< luma component frame height
+ int height_cr; //!< chroma component frame width
+ int width_cr; //!< chroma component frame height
+ int width_crop; //!< width after cropping consideration
+ int height_crop; //!< height after cropping consideration
+ int mb_width; //!< luma component frame width
+ int mb_height; //!< luma component frame height
+ int size_cmp[3]; //!< component sizes
+ int size; //!< total image size
+ int bit_depth[3]; //!< component bit depth
+ int max_value[3]; //!< component max value
+ int max_value_sq[3]; //!< component max value squared
+} FrameFormat;
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/ifunctions.h b/Src/h264dec/lcommon/inc/ifunctions.h
new file mode 100644
index 00000000..0d0e86d8
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/ifunctions.h
@@ -0,0 +1,251 @@
+
+/*!
+ ************************************************************************
+ * \file
+ * ifunctions.h
+ *
+ * \brief
+ * define some inline functions that are used within the encoder.
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Sühring <suehring@hhi.de>
+ * - Alexis Tourapis <alexismt@ieee.org>
+ *
+ ************************************************************************
+ */
+#ifndef _IFUNCTIONS_H_
+#define _IFUNCTIONS_H_
+
+# if !defined(WIN32) && (__STDC_VERSION__ < 199901L)
+ #define static
+ #define inline
+#endif
+#include <math.h>
+#include <limits.h>
+
+
+static inline short smin(short a, short b)
+{
+ return (short) (((a) < (b)) ? (a) : (b));
+}
+
+static inline short smax(short a, short b)
+{
+ return (short) (((a) > (b)) ? (a) : (b));
+}
+
+static inline int imin(int a, int b)
+{/*
+ int retu;
+ _asm
+ {
+ mov eax, a
+ mov edx, b
+ cmp edx, eax
+ cmovle eax, edx
+ mov retu, eax
+ }
+ return retu;*/
+ return ((a) < (b)) ? (a) : (b);
+}
+
+static inline int imax(int a, int b)
+{
+ return ((a) > (b)) ? (a) : (b);
+}
+
+static inline double dmin(double a, double b)
+{
+ return ((a) < (b)) ? (a) : (b);
+}
+
+static inline double dmax(double a, double b)
+{
+ return ((a) > (b)) ? (a) : (b);
+}
+
+static inline int64 i64min(int64 a, int64 b)
+{
+ return ((a) < (b)) ? (a) : (b);
+}
+
+static inline int64 i64max(int64 a, int64 b)
+{
+ return ((a) > (b)) ? (a) : (b);
+}
+
+
+static inline short sabs(short x)
+{
+ static const short SHORT_BITS = (sizeof(short) * CHAR_BIT) - 1;
+ short y = (short) (x >> SHORT_BITS);
+ return (short) ((x ^ y) - y);
+}
+
+static inline int iabs(int x)
+{
+ static const int INT_BITS = (sizeof(int) * CHAR_BIT) - 1;
+ int y = x >> INT_BITS;
+ return (x ^ y) - y;
+}
+
+static inline double dabs(double x)
+{
+ return ((x) < 0) ? -(x) : (x);
+}
+
+static inline int64 i64abs(int64 x)
+{
+ static const int64 INT64_BITS = (sizeof(int64) * CHAR_BIT) - 1;
+ int64 y = x >> INT64_BITS;
+ return (x ^ y) - y;
+}
+
+static inline double dabs2(double x)
+{
+ return (x) * (x);
+}
+
+static inline int iabs2(int x)
+{
+ return (x) * (x);
+}
+
+static inline int64 i64abs2(int64 x)
+{
+ return (x) * (x);
+}
+
+static inline int isign(int x)
+{
+ return ( (x > 0) - (x < 0));
+}
+
+static inline int isignab(int a, int b)
+{
+ return ((b) < 0) ? -iabs(a) : iabs(a);
+}
+
+static inline int rshift_rnd(int x, int a)
+{
+ return (a > 0) ? ((x + (1 << (a-1) )) >> a) : (x << (-a));
+}
+
+static inline int rshift_rnd_pos(int x, int a)
+{
+ return (x + (1 << (a-1) )) >> a;
+}
+
+// flip a before calling
+static inline int rshift_rnd_nonpos(int x, int a)
+{
+ return (x << a);
+}
+
+static inline int rshift_rnd_sign(int x, int a)
+{
+ return (x > 0) ? ( ( x + (1 << (a-1)) ) >> a ) : (-( ( iabs(x) + (1 << (a-1)) ) >> a ));
+}
+
+static inline unsigned int rshift_rnd_us(unsigned int x, unsigned int a)
+{
+ return (a > 0) ? ((x + (1 << (a-1))) >> a) : x;
+}
+
+static inline int rshift_rnd_sf(int x, int a)
+{
+ return ((x + (1 << (a-1) )) >> a);
+}
+
+static inline unsigned int rshift_rnd_us_sf(unsigned int x, unsigned int a)
+{
+ return ((x + (1 << (a-1))) >> a);
+}
+
+static inline int iClip1(int high, int x)
+{
+ if (x < 0)
+ return 0;
+ if (x > high)
+ return high;
+ return x;
+ /* old:
+ x = imax(x, 0);
+ x = imin(x, high);
+
+ return x;*/
+}
+
+static inline int iClip3(int low, int high, int x)
+{
+ if (x < low)
+ return low;
+ if (x > high)
+ return high;
+ return x;
+ /* old:
+ x = imax(x, low);
+ x = imin(x, high);
+
+ return x;*/
+}
+
+static inline short sClip3(short low, short high, short x)
+{
+ x = smax(x, low);
+ x = smin(x, high);
+
+ return x;
+}
+
+static inline double dClip3(double low, double high, double x)
+{
+ x = dmax(x, low);
+ x = dmin(x, high);
+
+ return x;
+}
+
+static inline int weighted_cost(int factor, int bits)
+{
+ return (((factor)*(bits))>>LAMBDA_ACCURACY_BITS);
+}
+
+static inline int RSD(int x)
+{
+ return ((x&2)?(x|1):(x&(~1)));
+}
+
+static inline int power2(int x)
+{
+ return 1 << (x);
+}
+
+static inline int float2int (float x)
+{
+ return (int)((x < 0) ? (x - 0.5f) : (x + 0.5f));
+}
+
+
+
+#if ZEROSNR
+static inline float psnr(int max_sample_sq, int samples, float sse_distortion )
+{
+ return (float) (10.0 * log10(max_sample_sq * (double) ((double) samples / (sse_distortion < 1.0 ? 1.0 : sse_distortion))));
+}
+#else
+static inline float psnr(int max_sample_sq, int samples, float sse_distortion )
+{
+ return (float) (sse_distortion == 0.0 ? 0.0 : (10.0 * log10(max_sample_sq * (double) ((double) samples / sse_distortion))));
+}
+#endif
+
+
+# if !defined(WIN32) && (__STDC_VERSION__ < 199901L)
+ #undef static
+ #undef inline
+#endif
+
+#endif
+
diff --git a/Src/h264dec/lcommon/inc/img_io.h b/Src/h264dec/lcommon/inc/img_io.h
new file mode 100644
index 00000000..7d57d03f
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/img_io.h
@@ -0,0 +1,28 @@
+/*!
+ *************************************************************************************
+ * \file img_io.h
+ *
+ * \brief
+ * image I/O related functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+#include "global.h"
+
+#ifndef _IMG_IO_H_
+#define _IMG_IO_H_
+
+#include "io_video.h"
+
+extern int ParseSizeFromString (VideoDataFile *input_file, int *xlen, int *ylen, double *fps);
+extern void ParseFrameNoFormatFromString (VideoDataFile *input_file);
+extern void OpenFrameFile (VideoDataFile *input_file, int FrameNumberInFile);
+extern void OpenFiles (VideoDataFile *input_file);
+extern void CloseFiles (VideoDataFile *input_file);
+extern VideoFileType ParseVideoType (VideoDataFile *input_file);
+
+#endif
+
diff --git a/Src/h264dec/lcommon/inc/mb_access.h b/Src/h264dec/lcommon/inc/mb_access.h
new file mode 100644
index 00000000..0bad3bca
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/mb_access.h
@@ -0,0 +1,70 @@
+
+/*!
+ *************************************************************************************
+ * \file mb_access.h
+ *
+ * \brief
+ * Functions for macroblock neighborhoods
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Sühring <suehring@hhi.de>
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+
+#ifndef _MB_ACCESS_H_
+#define _MB_ACCESS_H_
+
+extern void CheckAvailabilityOfNeighbors(Macroblock *currMB);
+
+/* MB Aff */
+extern void getAffNeighbour (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+extern void getAffNeighbourLuma (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getAffNeighbourXPLuma (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getAffNeighbourPPLumaNB (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getAffNeighbourNPLuma (const Macroblock *currMB, int yN, PixelPos *pix);
+extern void getAffNeighbourN0Luma (const Macroblock *currMB, PixelPos *pix);
+extern void getAffNeighbourNXLuma (const Macroblock *currMB, int xN, PixelPos *pix);
+extern void getAffNeighbour0X (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix);
+extern void getAffNeighbour0XLuma (const Macroblock *currMB, int yN, PixelPos *pix);
+extern void getAffNeighbour0N (const Macroblock *currMB, const int mb_size[2], PixelPos *pix);
+extern void getAffNeighbourX0 (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix);
+extern void getAffNeighbourNX (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix);
+extern void getAffNeighbourN0 (const Macroblock *currMB, const int mb_size[2], PixelPos *pix);
+extern void getAffNeighbour0NLuma (const Macroblock *currMB, PixelPos *pix);
+extern void getAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix);
+/* normal */
+extern void getNonAffNeighbour (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbourXP_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbourPX_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbourLuma (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourXPLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourPXLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourN0Luma(const Macroblock *currMB, PixelPos *pix);
+extern void getNonAffNeighbourNXLuma(const Macroblock *currMB, int yN, PixelPos *pix);
+extern void getNonAffNeighbourN0 (const Macroblock *currMB, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbour0N (const Macroblock *currMB, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbourNX (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbourNP (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbourNPChromaNB(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbour0X (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbour0XLuma(const Macroblock *currMB, int yN, PixelPos *pix);
+extern void getNonAffNeighbourX0 (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix);
+extern void getNonAffNeighbour0NLuma(const Macroblock *currMB, PixelPos *pix);
+extern void getNonAffNeighbourNPLumaNB(const Macroblock *currMB, int yN, PixelPos *pix);
+extern void getNonAffNeighbourXPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourPPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourXPLumaNB_NoPos(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+extern void getNonAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix);
+extern void get4x4Neighbour (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+extern void get4x4NeighbourLuma (const Macroblock *currMB, int block_x, int block_y, PixelPos *pix);
+extern Boolean mb_is_available (int mbAddr, const Macroblock *currMB);
+extern void get_mb_pos (VideoParameters *p_Vid, int mb_addr, const int mb_size[2], short *x, short *y);
+extern void get_mb_block_pos_normal (const h264_pic_position *PicPos, int mb_addr, short *x, short *y);
+extern void get_mb_block_pos_mbaff (const h264_pic_position *PicPos, int mb_addr, short *x, short *y);
+
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/memalloc.h b/Src/h264dec/lcommon/inc/memalloc.h
new file mode 100644
index 00000000..fb4c3132
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/memalloc.h
@@ -0,0 +1,71 @@
+
+/*!
+ ************************************************************************
+ * \file memalloc.h
+ *
+ * \brief
+ * Memory allocation and free helper funtions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Sühring <suehring@hhi.de>
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ ************************************************************************
+ */
+
+#ifndef _MEMALLOC_H_
+#define _MEMALLOC_H_
+
+#include "global.h"
+#include "quant_params.h"
+
+#if defined(USEMMX) // && (IMGTYPE == 0) // MMX, SSE, SSE2 intrinsic support
+#if defined(_MSC_VER) || defined(__INTEL_COMPILER) // ICC
+# include <emmintrin.h>
+# else
+# include <xmmintrin.h>
+# endif
+#endif
+
+extern int get_mem2D(byte ***array2D, int dim0, int dim1);
+extern int get_mem3D(byte ****array3D, int dim0, int dim1, int dim2);
+extern int get_mem4D(byte *****array4D, int dim0, int dim1, int dim2, int dim3);
+
+extern int get_mem2Dint(int ***array2D, int rows, int columns);
+extern int get_mem3Dint(int ****array3D, int frames, int rows, int columns);
+extern int get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns );
+
+extern int get_mem2DPicMotion(struct pic_motion ***array3D, int rows, int columns);
+extern int get_mem3Dref(h264_ref_t ****array3D, int frames, int rows, int columns);
+
+extern int get_mem2Dshort(short ***array2D, int dim0, int dim1);
+extern MotionVector ***get_mem3DMotionVector(int dim0, int dim1, int dim2);
+extern int get_mem4Dshort(short *****array4D, int dim0, int dim1, int dim2, int dim3);
+extern int get_mem2Dpel(imgpel ***array2D, int rows, int columns);
+
+extern struct video_image *get_memImage(int width, int height);
+extern void free_memImage(struct video_image *image);
+
+extern void free_mem2D (byte **array2D);
+extern void free_mem3D (byte ***array3D);
+extern void free_mem4D (byte ****array4D);
+//
+extern void free_mem2Dint (int **array2D);
+extern void free_mem3Dint (int ***array3D);
+
+extern void free_mem3Dref(h264_ref_t ***array3D);
+extern void free_mem2DPicMotion(struct pic_motion **array3D);
+//
+extern void free_mem2Dshort(short **array2D);
+
+extern void free_mem3DMotionVector(MotionVector ***);
+
+extern void free_mem2Dpel (imgpel **array2D);
+extern int init_top_bot_planes(imgpel **imgFrame, int height, imgpel ***imgTopField, imgpel ***imgBotField);
+extern void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField);
+
+extern void no_mem_exit(char *where);
+
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/memcache.h b/Src/h264dec/lcommon/inc/memcache.h
new file mode 100644
index 00000000..26131827
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/memcache.h
@@ -0,0 +1,24 @@
+#pragma once
+typedef struct image_cache
+{
+ int size_x, size_y;
+ struct video_image *head;
+} ImageCache;
+
+void image_cache_set_dimensions(ImageCache *cache, int width, int height);
+int image_cache_dimensions_match(ImageCache *cache, int width, int height);
+void image_cache_add(ImageCache *cache, struct video_image *image);
+struct video_image *image_cache_get(ImageCache *cache);
+void image_cache_flush(ImageCache *cache);
+
+typedef struct motion_cache
+{
+ int size_x, size_y;
+ struct pic_motion **head;
+} MotionCache;
+
+void motion_cache_set_dimensions(MotionCache *cache, int width, int height);
+int motion_cache_dimensions_match(MotionCache *cache, int width, int height);
+void motion_cache_add(MotionCache *cache, struct pic_motion **image);
+struct pic_motion **motion_cache_get(MotionCache *cache);
+void motion_cache_flush(MotionCache *cache); \ No newline at end of file
diff --git a/Src/h264dec/lcommon/inc/mv_prediction.h b/Src/h264dec/lcommon/inc/mv_prediction.h
new file mode 100644
index 00000000..0f2a13e2
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/mv_prediction.h
@@ -0,0 +1,19 @@
+/*!
+ *************************************************************************************
+ * \file mv_prediction.h
+ *
+ * \brief
+ * Declarations for Motion Vector Prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+
+#ifndef _MV_PREDICTION_H_
+#define _MV_PREDICTION_H_
+
+extern void init_motion_vector_prediction(Macroblock *currMB, int MbaffFrameFlag);
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/nalucommon.h b/Src/h264dec/lcommon/inc/nalucommon.h
new file mode 100644
index 00000000..f0288ac5
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/nalucommon.h
@@ -0,0 +1,64 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * nalucommon.h
+ * \brief
+ * NALU handling common to encoder and decoder
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ * - Karsten Suehring <suehring@hhi.de>
+ ***************************************************************************************
+ */
+
+#ifndef _NALUCOMMON_H_
+#define _NALUCOMMON_H_
+
+#define MAXRBSPSIZE 64000
+#define MAXNALUSIZE 64000
+
+//! values for nal_unit_type
+typedef enum {
+ NALU_TYPE_SLICE = 1,
+ NALU_TYPE_DPA = 2,
+ NALU_TYPE_DPB = 3,
+ NALU_TYPE_DPC = 4,
+ NALU_TYPE_IDR = 5,
+ NALU_TYPE_SEI = 6,
+ NALU_TYPE_SPS = 7,
+ NALU_TYPE_PPS = 8,
+ NALU_TYPE_AUD = 9,
+ NALU_TYPE_EOSEQ = 10,
+ NALU_TYPE_EOSTREAM = 11,
+ NALU_TYPE_FILL = 12
+} NaluType;
+
+//! values for nal_ref_idc
+typedef enum {
+ NALU_PRIORITY_HIGHEST = 3,
+ NALU_PRIORITY_HIGH = 2,
+ NALU_PRIORITY_LOW = 1,
+ NALU_PRIORITY_DISPOSABLE = 0
+} NalRefIdc;
+
+//! NAL unit structure
+typedef struct nalu_t
+{
+ int startcodeprefix_len; //!< 4 for parameter sets and first slice in picture, 3 for everything else (suggested)
+ unsigned len; //!< Length of the NAL unit (Excluding the start code, which does not belong to the NALU)
+ unsigned max_size; //!< NAL Unit Buffer size
+ int forbidden_bit; //!< should be always FALSE
+ NaluType nal_unit_type; //!< NALU_TYPE_xxxx
+ NalRefIdc nal_reference_idc; //!< NALU_PRIORITY_xxxx
+ byte *buf; //!< contains the first byte followed by the EBSP
+ uint16 lost_packets; //!< true, if packet loss is detected
+} NALU_t;
+
+//! allocate one NAL Unit
+extern NALU_t *AllocNALU(int);
+
+//! free one NAL Unit
+extern void FreeNALU(NALU_t *n);
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/quant_params.h b/Src/h264dec/lcommon/inc/quant_params.h
new file mode 100644
index 00000000..c35682c0
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/quant_params.h
@@ -0,0 +1,55 @@
+/*!
+ ***************************************************************************
+ * \file
+ * quant_params.h
+ *
+ * \author
+ * Alexis Michael Tourapis
+ *
+ * \brief
+ * Headerfile for Quantization parameters
+ **************************************************************************
+ */
+
+#ifndef _QUANT_PARAMS_H_
+#define _QUANT_PARAMS_H_
+
+struct level_quant_params {
+ int OffsetComp;
+ int ScaleComp;
+ int InvScaleComp;
+};
+
+typedef struct level_quant_params LevelQuantParams;
+
+struct quant_params {
+ int AdaptRndWeight;
+ int AdaptRndCrWeight;
+
+ LevelQuantParams *****q_params_4x4;
+ LevelQuantParams *****q_params_8x8;
+
+ int *qp_per_matrix;
+ int *qp_rem_matrix;
+
+ short **OffsetList4x4input;
+ short **OffsetList8x8input;
+ short ***OffsetList4x4;
+ short ***OffsetList8x8;
+};
+
+struct quant_methods {
+ int block_y;
+ int block_x;
+ int qp;
+ int* ACLevel;
+ int* ACRun;
+ int **fadjust;
+ LevelQuantParams **q_params;
+ int *coeff_cost;
+ const byte (*pos_scan)[2];
+ const byte *c_cost;
+};
+
+#endif
+
diff --git a/Src/h264dec/lcommon/inc/transform.h b/Src/h264dec/lcommon/inc/transform.h
new file mode 100644
index 00000000..61942004
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/transform.h
@@ -0,0 +1,27 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file transform.h
+ *
+ * \brief
+ * prototypes of transform functions
+ *
+ * \date
+ * 10 July 2007
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * Alexis Michael Tourapis
+ **************************************************************************/
+
+#ifndef _TRANSFORM_H_
+#define _TRANSFORM_H_
+
+#include "global.h"
+
+extern void forward4x4 (int **block , int **tblock, int pos_y, int pos_x);
+extern void ihadamard4x4 (int block[4][4]);
+extern void ihadamard2x2 (int block[4], int tblock[4]);
+
+#endif //_TRANSFORM_H_
diff --git a/Src/h264dec/lcommon/inc/typedefs.h b/Src/h264dec/lcommon/inc/typedefs.h
new file mode 100644
index 00000000..58806bef
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/typedefs.h
@@ -0,0 +1,71 @@
+/*!
+ *************************************************************************************
+ * \file typedefs.h
+ *
+ * \brief
+ * Common type definitions
+ * Currently only supports Windows and Linux operating systems.
+ * Need to add support for other "older systems such as VAX, DECC, Unix Alpha etc
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+
+#ifndef _TYPEDEFS_H_
+#define _TYPEDEFS_H_
+
+#include "win32.h"
+
+typedef unsigned char byte; //!< byte type definition
+typedef unsigned char uint8; //!< type definition for unsigned char (same as byte, 8 bits)
+typedef unsigned short uint16; //!< type definition for unsigned short (16 bits)
+typedef unsigned int uint32; //!< type definition for unsigned int (32 bits)
+
+typedef char int8;
+typedef short int16;
+typedef int int32;
+
+#if (IMGTYPE == 0)
+typedef byte imgpel;
+typedef uint16 distpel;
+typedef int32 distblk;
+#elif (IMGTYPE == 2)
+typedef float imgpel;
+typedef float distpel;
+typedef float distblk;
+#else
+typedef uint16 imgpel;
+typedef uint32 distpel;
+typedef int64 distblk;
+#endif
+
+//! Boolean Type
+#ifdef FALSE
+# define Boolean int
+#else
+typedef enum {
+ FALSE,
+ TRUE
+} Boolean;
+#endif
+
+/*
+#define MAXUINT8 0xff
+#define MAXUINT16 0xffff
+#define MAXUINT32 0xffffffff
+#define MAXUINT64 0xffffffffffffffff
+
+#define MAXINT8 0x7f
+#define MININT8 (-MAXINT8)
+#define MAXINT16 0x7fff
+#define MININT16 (-MAXINT16)
+#define MAXINT32 0x7fffffff
+#define MININT32 (-MAXINT32)
+#define MAXINT64 0x7fffffffffffffff
+#define MININT64 (-MAXINT64)
+*/
+
+#endif
+
diff --git a/Src/h264dec/lcommon/inc/types.h b/Src/h264dec/lcommon/inc/types.h
new file mode 100644
index 00000000..6088f8e4
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/types.h
@@ -0,0 +1,204 @@
+/*!
+ ************************************************************************
+ * \file
+ * types.h
+ *
+ * \brief
+ * type definitions.
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ *
+ ************************************************************************
+ */
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+/***********************************************************************
+ * T y p e d e f i n i t i o n s f o r T M L
+ ***********************************************************************
+ */
+
+typedef enum
+{
+ // YUV
+ PLANE_Y = 0, // PLANE_Y
+ PLANE_U = 1, // PLANE_Cb
+ PLANE_V = 2, // PLANE_Cr
+ // RGB
+ PLANE_G = 0,
+ PLANE_B = 1,
+ PLANE_R = 2
+} ColorPlane;
+
+enum {
+ LIST_0 = 0,
+ LIST_1 = 1,
+ BI_PRED = 2,
+ BI_PRED_L0 = 3,
+ BI_PRED_L1 = 4
+};
+
+enum {
+ ERROR_SAD = 0,
+ ERROR_SSE = 1,
+ ERROR_SATD = 2,
+ ERROR_PSATD = 3
+};
+
+enum {
+ ME_Y_ONLY = 0,
+ ME_YUV_FP = 1,
+ ME_YUV_FP_SP = 2
+};
+
+
+enum {
+ DISTORTION_MSE = 0
+};
+
+
+//! Data Partitioning Modes
+typedef enum
+{
+ PAR_DP_1, //!< no data partitioning is supported
+ PAR_DP_3 //!< data partitioning with 3 partitions
+} PAR_DP_TYPE;
+
+
+//! Output File Types
+typedef enum
+{
+ PAR_OF_ANNEXB, //!< Annex B byte stream format
+ PAR_OF_RTP, //!< RTP packets in outfile
+ PAR_OF_MEMORY, //!<
+} PAR_OF_TYPE;
+
+//! Field Coding Types
+typedef enum
+{
+ FRAME_CODING,
+ FIELD_CODING,
+ ADAPTIVE_CODING,
+ FRAME_MB_PAIR_CODING
+} CodingType;
+
+//! definition of H.264 syntax elements
+typedef enum
+{
+ SE_HEADER,
+ SE_PTYPE,
+ SE_MBTYPE,
+ SE_REFFRAME,
+ SE_INTRAPREDMODE,
+ SE_MVD,
+ SE_CBP,
+ SE_LUM_DC_INTRA,
+ SE_CHR_DC_INTRA,
+ SE_LUM_AC_INTRA,
+ SE_CHR_AC_INTRA,
+ SE_LUM_DC_INTER,
+ SE_CHR_DC_INTER,
+ SE_LUM_AC_INTER,
+ SE_CHR_AC_INTER,
+ SE_DELTA_QUANT,
+ SE_BFRAME,
+ SE_EOS,
+ SE_MAX_ELEMENTS = 20 //!< number of maximum syntax elements
+} SE_type; // substituting the definitions in elements.h
+
+
+typedef enum
+{
+ NO_SLICES,
+ FIXED_MB,
+ FIXED_RATE,
+ CALL_BACK
+} SliceMode;
+
+
+typedef enum
+{
+ CAVLC,
+ CABAC
+} SymbolMode;
+
+typedef enum
+{
+ FULL_SEARCH = -1,
+ FAST_FULL_SEARCH = 0,
+ UM_HEX = 1,
+ UM_HEX_SIMPLE = 2,
+ EPZS = 3
+} SearchType;
+
+
+typedef enum
+{
+ FRAME,
+ TOP_FIELD,
+ BOTTOM_FIELD
+} PictureStructure; //!< New enum for field processing
+
+typedef enum
+{
+ P_SLICE = 0,
+ B_SLICE = 1,
+ I_SLICE = 2,
+ SP_SLICE = 3,
+ SI_SLICE = 4,
+ NUM_SLICE_TYPES = 5
+} SliceType;
+
+//Motion Estimation levels
+typedef enum
+{
+ F_PEL, //!< Full Pel refinement
+ H_PEL, //!< Half Pel refinement
+ Q_PEL //!< Quarter Pel refinement
+} MELevel;
+
+typedef enum
+{
+ FAST_ACCESS = 0, //!< Fast/safe reference access
+ UMV_ACCESS = 1 //!< unconstrained reference access
+} REF_ACCESS_TYPE;
+
+typedef enum
+{
+ IS_LUMA = 0,
+ IS_CHROMA = 1
+} Component_Type;
+
+typedef enum
+{
+ RC_MODE_0 = 0,
+ RC_MODE_1 = 1,
+ RC_MODE_2 = 2,
+ RC_MODE_3 = 3
+} RCModeType;
+
+
+typedef enum {
+ SSE = 0,
+ SSE_RGB = 1,
+ PSNR = 2,
+ PSNR_RGB = 3,
+ SSIM = 4,
+ SSIM_RGB = 5,
+ MS_SSIM = 6,
+ MS_SSIM_RGB = 7,
+ TOTAL_DIST_TYPES = 8
+} distortion_types;
+
+typedef enum {
+ WP_MCPREC_PLUS0 = 4,
+ WP_MCPREC_PLUS1 = 5,
+ WP_MCPREC_MINUS0 = 6,
+ WP_MCPREC_MINUS1 = 7,
+ WP_MCPREC_MINUS_PLUS0 = 8,
+ WP_REGULAR = 9
+} weighted_prediction_types;
+
+
+#endif
diff --git a/Src/h264dec/lcommon/inc/win32.h b/Src/h264dec/lcommon/inc/win32.h
new file mode 100644
index 00000000..09ffef61
--- /dev/null
+++ b/Src/h264dec/lcommon/inc/win32.h
@@ -0,0 +1,92 @@
+
+/*!
+ ************************************************************************
+ * \file
+ * win32.h
+ *
+ * \brief
+ * win32 definitions for H.264 encoder.
+ *
+ * \author
+ *
+ ************************************************************************
+ */
+#ifndef _H264_WIN32_H_
+#define _H264_WIN32_H_
+#pragma once
+
+# include <fcntl.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <string.h>
+# include <assert.h>
+
+#if defined(WIN32)
+# include <io.h>
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <windows.h>
+#ifndef strcasecmp
+# define strcasecmp _strcmpi
+#endif
+
+# define snprintf _snprintf
+# define open _open
+# define close _close
+# define read _read
+# define write _write
+#ifndef lseek
+# define lseek _lseeki64
+#endif
+# define fsync _commit
+# define tell _telli64
+# define TIMEB _timeb
+# define TIME_T LARGE_INTEGER
+# define OPENFLAGS_WRITE _O_WRONLY|_O_CREAT|_O_BINARY|_O_TRUNC
+# define OPEN_PERMISSIONS _S_IREAD | _S_IWRITE
+# define OPENFLAGS_READ _O_RDONLY|_O_BINARY
+# define inline _inline
+# define forceinline __forceinline
+#else
+# include <unistd.h>
+# include <sys/time.h>
+# include <sys/stat.h>
+# include <time.h>
+
+# define TIMEB timeb
+# define TIME_T struct timeval
+# define tell(fd) lseek(fd, 0, SEEK_CUR)
+# define OPENFLAGS_WRITE O_WRONLY|O_CREAT|O_TRUNC
+# define OPENFLAGS_READ O_RDONLY
+# define OPEN_PERMISSIONS S_IRUSR | S_IWUSR
+
+# if __STDC_VERSION__ >= 199901L
+ /* "inline" is a keyword */
+# else
+# define inline /* nothing */
+# endif
+# define forceinline inline
+#endif
+
+#if defined(WIN32) && !defined(__GNUC__)
+typedef __int64 int64;
+typedef unsigned __int64 uint64;
+# define FORMAT_OFF_T "I64d"
+# ifndef INT64_MIN
+# define INT64_MIN (-9223372036854775807i64 - 1i64)
+# endif
+#else
+
+typedef long long int64;
+typedef unsigned long long uint64;
+# define FORMAT_OFF_T "lld"
+# ifndef INT64_MIN
+# define INT64_MIN (-9223372036854775807LL - 1LL)
+# endif
+#endif
+
+void gettime(TIME_T* time);
+int64 timediff(TIME_T* start, TIME_T* end);
+int64 timenorm(int64 cur_time);
+
+#endif
diff --git a/Src/h264dec/lcommon/src/img_io.c b/Src/h264dec/lcommon/src/img_io.c
new file mode 100644
index 00000000..c0520218
--- /dev/null
+++ b/Src/h264dec/lcommon/src/img_io.c
@@ -0,0 +1,327 @@
+
+/*!
+ *************************************************************************************
+ * \file img_io.c
+ *
+ * \brief
+ * image I/O related functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+#include "contributors.h"
+#include "global.h"
+#include "img_io.h"
+#include "report.h"
+
+static const VIDEO_SIZE VideoRes[] = {
+ { "qcif" , 176, 144},
+ { "qqvga" , 160, 128},
+ { "qvga" , 320, 240},
+ { "sif" , 352, 240},
+ { "cif" , 352, 288},
+ { "vga" , 640, 480},
+ { "sd1" , 720, 480},
+ { "sd2" , 704, 576},
+ { "sd3" , 720, 576},
+ { "720p" , 1280, 720},
+ { "1080p" , 1920, 1080},
+ { NULL, 0, 0}
+};
+
+/*!
+ ************************************************************************
+ * \brief
+ * Parse Size from from file name
+ *
+ ************************************************************************
+ */
+int ParseSizeFromString (VideoDataFile *input_file, int *x_size, int *y_size, double *fps)
+{
+ char *p1, *p2, *tail;
+ char *fn = input_file->fname;
+ char c;
+ int i = 0;
+
+ *x_size = *y_size = -1;
+ p1 = p2 = fn;
+ while (p1 != NULL && p2 != NULL)
+ {
+ // Search for first '_'
+ p1 = strstr( p1, "_");
+ if (p1 == NULL)
+ break;
+
+ // Search for end character of x_size (first 'x' after last '_')
+ p2 = strstr( p1, "x");
+
+ // If no 'x' is found, exit
+ if (p2 == NULL)
+ break;
+
+ // Try conversion of number
+ *p2 = 0;
+ *x_size = strtol( p1 + 1, &tail, 10);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p1 + 1) == '\0')
+ {
+ *p2 = 'x';
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p2 = 'x';
+
+ // Search for end character of y_size (first '_' or '.' after last 'x')
+ p1 = strpbrk( p2 + 1, "_.");
+ // If no '_' or '.' is found, try again from current position
+ if (p1 == NULL)
+ {
+ p1 = p2 + 1;
+ continue;
+ }
+
+ // Try conversion of number
+ c = *p1;
+ *p1 = 0;
+ *y_size = strtol( p2 + 1, &tail, 10);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p2 + 1) == '\0')
+ {
+ *p1 = c;
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p1 = c;
+
+ // Search for end character of y_size (first 'i' or 'p' after last '_')
+ p2 = strstr( p1 + 1, "ip");
+
+ // If no 'i' or 'p' is found, exit
+ if (p2 == NULL)
+ break;
+
+ // Try conversion of number
+ c = *p2;
+ *p2 = 0;
+ *fps = strtod( p1 + 1, &tail);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p1 + 1) == '\0')
+ {
+ *p2 = c;
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p2 = c;
+ break;
+ }
+
+ // Now lets test some common video file formats
+ if (p1 == NULL || p2 == NULL)
+ {
+ for (i = 0; VideoRes[i].name != NULL; i++)
+ {
+ if (strcasecmp (fn, VideoRes[i].name))
+ {
+ *x_size = VideoRes[i].x_size;
+ *y_size = VideoRes[i].y_size;
+ // Should add frame rate support as well
+ break;
+ }
+ }
+ }
+
+ return (*x_size == -1 || *y_size == -1) ? 0 : 1;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Parse Size from from file name
+ *
+ ************************************************************************
+ */
+void ParseFrameNoFormatFromString (VideoDataFile *input_file)
+{
+ char *p1, *p2, *tail;
+ char *fn = input_file->fname;
+ char *fhead = input_file->fhead;
+ char *ftail = input_file->ftail;
+ int *zero_pad = &input_file->zero_pad;
+ int *num_digits = &input_file->num_digits;
+
+ *zero_pad = 0;
+ *num_digits = -1;
+ p1 = p2 = fn;
+ while (p1 != NULL && p2 != NULL)
+ {
+ // Search for first '_'
+ p1 = strstr( p1, "%");
+ if (p1 == NULL)
+ break;
+
+ strncpy(fhead, fn, p1 - fn);
+
+ // Search for end character of x_size (first 'x' after last '_')
+ p2 = strstr( p1, "d");
+
+ // If no 'x' is found, exit
+ if (p2 == NULL)
+ break;
+
+ // Try conversion of number
+ *p2 = 0;
+
+ if (*(p1 + 1) == '0')
+ *zero_pad = 1;
+
+ *num_digits = strtol( p1 + 1, &tail, 10);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p1 + 1) == '\0')
+ {
+ *p2 = 'd';
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p2 = 'd';
+
+ tail++;
+ strncpy(ftail, tail, strlen(tail));
+ break;
+ }
+
+ if (input_file->vdtype == VIDEO_TIFF)
+ {
+ input_file->is_concatenated = 0;
+ }
+ else
+ input_file->is_concatenated = (*num_digits == -1) ? 1 : 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Open file containing a single frame
+ ************************************************************************
+ */
+void OpenFrameFile( VideoDataFile *input_file, int FrameNumberInFile)
+{
+ char infile [FILE_NAME_SIZE], in_number[16];
+ int length = 0;
+ in_number[length]='\0';
+ length = strlen(input_file->fhead);
+ strncpy(infile, input_file->fhead, length);
+ infile[length]='\0';
+ if (input_file->zero_pad)
+ snprintf(in_number, 16, "%0*d", input_file->num_digits, FrameNumberInFile);
+ else
+ snprintf(in_number, 16, "%*d", input_file->num_digits, FrameNumberInFile);
+
+ strncat(infile, in_number, sizeof(in_number));
+ length += sizeof(in_number);
+ infile[length]='\0';
+ strncat(infile, input_file->ftail, strlen(input_file->ftail));
+ length += strlen(input_file->ftail);
+ infile[length]='\0';
+
+ if ((input_file->f_num = open(infile, OPENFLAGS_READ)) == -1)
+ {
+ printf ("OpenFrameFile: cannot open file %s\n", infile);
+ report_stats_on_error();
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Open file(s) containing the entire frame sequence
+ ************************************************************************
+ */
+void OpenFiles( VideoDataFile *input_file)
+{
+ if (input_file->is_concatenated == 1)
+ {
+ if (strlen(input_file->fname) == 0)
+ {
+ snprintf(errortext, ET_SIZE, "No input sequence name was provided. Please check settings.");
+ error (errortext, 500);
+ }
+
+ if ((input_file->f_num = open(input_file->fname, OPENFLAGS_READ)) == -1)
+ {
+ snprintf(errortext, ET_SIZE, "Input file %s does not exist",input_file->fname);
+ error (errortext, 500);
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Close input file
+ ************************************************************************
+ */
+void CloseFiles(VideoDataFile *input_file)
+{
+ if (input_file->f_num != -1)
+ close(input_file->f_num);
+ input_file->f_num = -1;
+}
+
+/* ==========================================================================
+ *
+ * ParseVideoType
+ *
+ * ==========================================================================
+*/
+VideoFileType ParseVideoType (VideoDataFile *input_file)
+{
+ char *format;
+
+ format = input_file->fname + strlen(input_file->fname) - 3;
+
+ if (strcasecmp (format, "yuv") == 0)
+ {
+ input_file->vdtype = VIDEO_YUV;
+ input_file->format.yuv_format = YUV420;
+ input_file->avi = NULL;
+ }
+ else if (strcasecmp (format, "rgb") == 0)
+ {
+ input_file->vdtype = VIDEO_RGB;
+ input_file->format.yuv_format = YUV444;
+ input_file->avi = NULL;
+ }
+ else if (strcasecmp (format, "tif") == 0)
+ {
+ input_file->vdtype = VIDEO_TIFF;
+ input_file->avi = NULL;
+ }
+ else if (strcasecmp (format, "avi") == 0)
+ {
+ input_file->vdtype = VIDEO_AVI;
+ }
+ else
+ {
+ //snprintf(errortext, ET_SIZE, "ERROR: video file format not supported");
+ //error (errortext, 500);
+ input_file->vdtype = VIDEO_YUV;
+ input_file->format.yuv_format = YUV420;
+ input_file->avi = NULL;
+ }
+
+ return input_file->vdtype;
+}
diff --git a/Src/h264dec/lcommon/src/memalloc.c b/Src/h264dec/lcommon/src/memalloc.c
new file mode 100644
index 00000000..da5872ed
--- /dev/null
+++ b/Src/h264dec/lcommon/src/memalloc.c
@@ -0,0 +1,1280 @@
+
+/*!
+ ************************************************************************
+ * \file memalloc.c
+ *
+ * \brief
+ * Memory allocation and free helper functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ * - Karsten Sühring <suehring@hhi.de>
+ *
+ ************************************************************************
+ */
+
+#include "global.h"
+#include "memalloc.h"
+#include "mbuffer.h"
+
+#define ROUNDUP16(size) (((size)+15) & ~15)
+
+#if !defined(USEMMX)
+ /*!
+ ************************************************************************
+ * \brief
+ * Initialize 2-dimensional top and bottom field to point to the proper
+ * lines in frame
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int init_top_bot_planes(imgpel **imgFrame, int dim0, imgpel ***imgTopField, imgpel ***imgBotField)
+{
+ int i;
+
+ if((*imgTopField = (imgpel**) malloc((dim0>>1) * sizeof(imgpel*))) == NULL)
+ no_mem_exit("init_top_bot_planes: imgTopField");
+
+ if((*imgBotField = (imgpel**) malloc((dim0>>1) * sizeof(imgpel*))) == NULL)
+ no_mem_exit("init_top_bot_planes: imgBotField");
+
+ for(i = 0; i < (dim0>>1); i++)
+ {
+ (*imgTopField)[i] = imgFrame[2 * i ];
+ (*imgBotField)[i] = imgFrame[2 * i + 1];
+ }
+
+ return dim0 * sizeof(imgpel*);
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ * free 2-dimensional top and bottom fields without freeing target memory
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField)
+{
+ free (imgTopField);
+ free (imgBotField);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 1D memory array -> imgpel array1D[dim0
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int get_mem1Dpel(imgpel **array1D, int dim0)
+{
+ if((*array1D = (imgpel*)calloc(dim0, sizeof(imgpel))) == NULL)
+ no_mem_exit("get_mem1Dpel: arra12D");
+
+ return (sizeof(imgpel*) + dim0 * sizeof(imgpel));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> imgpel array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int get_mem2Dpel(imgpel ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if((*array2D = (imgpel**)malloc(dim0 * sizeof(imgpel*))) == NULL)
+ no_mem_exit("get_mem2Dpel: array2D");
+ if((*(*array2D) = (imgpel* )calloc(dim0 * dim1,sizeof(imgpel ))) == NULL)
+ no_mem_exit("get_mem2Dpel: array2D");
+
+ for(i = 1 ; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(imgpel*) + dim1 * sizeof(imgpel));
+}
+
+VideoImage *get_memImage(int width, int height)
+{
+ int i, stride;
+ VideoImage *image = (VideoImage *)calloc(1, sizeof(VideoImage));
+
+#ifdef H264_IPP
+
+ IppiSize roi = {width, height};
+ if (!image)
+ return 0;
+ if((image->img = (imgpel**)malloc(height * sizeof(imgpel*))) == NULL)
+ return 0;
+
+ image->base_address = (imgpel* )ippiMalloc_8u_C1(width, height+1, &stride); // height+1 so we can deal with overreading
+ if (!image->base_address)
+ return 0;
+
+ image->stride=stride;
+
+ for(i = 0 ; i < height; i++)
+ image->img[i] = image->base_address + stride*i;
+
+ image->next = 0;
+
+ return image;
+#else
+ if (!image)
+ return 0;
+ stride = ROUNDUP16(width);
+ image->stride = stride;
+
+ if((image->img = (imgpel**)malloc(height * sizeof(imgpel*))) == NULL)
+ return 0;
+ if((image->base_address = (imgpel* )malloc(stride * height* sizeof(imgpel))) == NULL)
+ return 0;
+ memset(image->base_address, 0, stride * height* sizeof(imgpel));
+
+ for(i = 0 ; i < height; i++)
+ image->img[i] = image->base_address + stride*i;
+
+ return image;
+#endif
+}
+
+void free_memImage(VideoImage *image)
+{
+ free(image->img);
+ #ifdef H264_IPP
+ ippiFree(image->base_address);
+ #else
+ free(image->base_address);
+ #endif
+ free(image);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> imgpel array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dpel(imgpel ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(imgpel**);
+
+ if(((*array3D) = (imgpel***)malloc(dim0 * sizeof(imgpel**))) == NULL)
+ no_mem_exit("get_mem3Dpel: array3D");
+
+ mem_size += get_mem2Dpel(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i - 1] + dim1;
+
+ return mem_size;
+}
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory array -> imgpel array4D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dpel(imgpel *****array4D, int dim0, int dim1, int dim2, int dim3)
+{
+ int i, mem_size = dim0 * sizeof(imgpel***);
+
+ if(((*array4D) = (imgpel****)malloc(dim0 * sizeof(imgpel***))) == NULL)
+ no_mem_exit("get_mem4Dpel: array4D");
+
+ mem_size += get_mem3Dpel(*array4D, dim0 * dim1, dim2, dim3);
+
+ for(i = 1; i < dim0; i++)
+ (*array4D)[i] = (*array4D)[i - 1] + dim1;
+
+ return mem_size;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 1D memory array
+ * which was allocated with get_mem1Dpel()
+ ************************************************************************
+ */
+void free_mem1Dpel(imgpel *array1D)
+{
+ if (array1D)
+ {
+ free (array1D);
+ }
+ else
+ {
+ error ("free_mem1Dpel: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2Dpel()
+ ************************************************************************
+ */
+void free_mem2Dpel(imgpel **array2D)
+{
+ if (array2D)
+ {
+ if (*array2D)
+ free (*array2D);
+ else
+ error ("free_mem2Dpel: trying to free unused memory",100);
+
+ free (array2D);
+ }
+ else
+ {
+ error ("free_mem2Dpel: trying to free unused memory",100);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3Dpel()
+ ************************************************************************
+ */
+void free_mem3Dpel(imgpel ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Dpel(*array3D);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3Dpel: trying to free unused memory",100);
+ }
+}
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D memory array
+ * which was allocated with get_mem4Dpel()
+ ************************************************************************
+ */
+void free_mem4Dpel(imgpel ****array4D)
+{
+ if (array4D)
+ {
+ free_mem3Dpel(*array4D);
+ free (array4D);
+ }
+ else
+ {
+ error ("free_mem4Dpel: trying to free unused memory",100);
+ }
+}
+/*!
+ ************************************************************************
+ * \brief
+ * free 5D memory array
+ * which was allocated with get_mem5Dpel()
+ ************************************************************************
+ */
+void free_mem5Dpel(imgpel *****array5D)
+{
+ if (array5D)
+ {
+ free_mem4Dpel(*array5D);
+ free (array5D);
+ }
+ else
+ {
+ error ("free_mem5Dpel: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> unsigned char array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int get_mem2D(byte ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if(( *array2D = (byte**)malloc(dim0 * sizeof(byte*))) == NULL)
+ no_mem_exit("get_mem2D: array2D");
+ if((*(*array2D) = (byte* )calloc(dim0 * dim1,sizeof(byte ))) == NULL)
+ no_mem_exit("get_mem2D: array2D");
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(byte*) + dim1 * sizeof(byte));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dint(int ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if((*array2D = (int**)malloc(dim0 * sizeof(int*))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+ if((*(*array2D) = (int* )calloc(dim0 * dim1, sizeof(int ))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+
+ for(i = 1 ; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(int*) + dim1 * sizeof(int));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int64 array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+static int get_mem2Dref(h264_ref_t ***array2D, int dim0, int dim1)
+{
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(h264_ref_t));
+ if((*array2D = (h264_ref_t**)malloc(dim0 * sizeof(h264_ref_t*))) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ if((*(*array2D) = (h264_ref_t* )_aligned_malloc(malloc_size, 32)) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ memset((*array2D)[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(h264_ref_t*) + dim1 * sizeof(h264_ref_t));
+}
+
+int get_mem2DPicMotion(PicMotion ***array2D, int dim0, int dim1)
+{
+ // we allocate with one extra position in the first dimension
+ // so the motion_cache can use it as a next pointer
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(PicMotion));
+ if((*array2D = (PicMotion**)malloc((dim0+1) * sizeof(PicMotion*))) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ if((*(*array2D) = (PicMotion* )_aligned_malloc(malloc_size, 32)) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ memset((*array2D)[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+ (*array2D)[dim0] = 0;
+
+ return dim0 * (sizeof(PicMotion*) + dim1 * sizeof(PicMotion));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> unsigned char array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3D(byte ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(byte**);
+
+ if(((*array3D) = (byte***)malloc(dim0 * sizeof(byte**))) == NULL)
+ no_mem_exit("get_mem3D: array3D");
+
+ mem_size += get_mem2D(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory array -> unsigned char array4D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem4D(byte *****array4D, int dim0, int dim1, int dim2, int dim3)
+{
+ int i, mem_size = dim0 * sizeof(byte***);
+
+ if(((*array4D) = (byte****)malloc(dim0 * sizeof(byte***))) == NULL)
+ no_mem_exit("get_mem4D: array4D");
+
+ mem_size += get_mem3D(*array4D, dim0 * dim1, dim2, dim3);
+
+ for(i = 1; i < dim0; i++)
+ (*array4D)[i] = (*array4D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> int array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dint(int ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(int**);
+
+ if(((*array3D) = (int***)malloc(dim0 * sizeof(int**))) == NULL)
+ no_mem_exit("get_mem3Dint: array3D");
+
+ mem_size += get_mem2Dint(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> int64 array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dref(h264_ref_t ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(h264_ref_t**);
+
+ if(((*array3D) = (h264_ref_t***)malloc(dim0 * sizeof(h264_ref_t**))) == NULL)
+ no_mem_exit("get_mem3Dint64: array3D");
+
+ mem_size += get_mem2Dref(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory array -> int array4D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dint(int *****array4D, int dim0, int dim1, int dim2, int dim3)
+{
+ int i, mem_size = dim0 * sizeof(int***);
+
+ if(((*array4D) = (int****)malloc(dim0 * sizeof(int***))) == NULL)
+ no_mem_exit("get_mem4Dint: array4D");
+
+ mem_size += get_mem3Dint(*array4D, dim0 * dim1, dim2, dim3);
+
+ for(i = 1; i < dim0; i++)
+ (*array4D)[i] = (*array4D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2D()
+ ************************************************************************
+ */
+void free_mem2D(byte **array2D)
+{
+ if (array2D)
+ {
+ free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2Dint()
+ ************************************************************************
+ */
+void free_mem2Dint(int **array2D)
+{
+ if (array2D)
+ {
+ free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2Dint64()
+ ************************************************************************
+ */
+void free_mem2Dref(h264_ref_t **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free (*array2D);
+ free (array2D);
+ }
+}
+
+void free_mem2DPicMotion(PicMotion **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3D()
+ ************************************************************************
+ */
+void free_mem3D(byte ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2D(*array3D);
+ free (array3D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D memory array
+ * which was allocated with get_mem3D()
+ ************************************************************************
+ */
+void free_mem4D(byte ****array4D)
+{
+ if (array4D)
+ {
+ free_mem3D(*array4D);
+ free (array4D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3Dint()
+ ************************************************************************
+ */
+void free_mem3Dint(int ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Dint(*array3D);
+ free (array3D);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3Dint64()
+ ************************************************************************
+ */
+void free_mem3Dref(h264_ref_t ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Dref(*array3D);
+ free (array3D);
+ }
+}
+
+void free_mem3DPicMotion(PicMotion ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2DPicMotion(*array3D);
+ free (array3D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D memory array
+ * which was allocated with get_mem4Dint()
+ ************************************************************************
+ */
+void free_mem4Dint(int ****array4D)
+{
+ if (array4D)
+ {
+ free_mem3Dint( *array4D);
+ free (array4D);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Exit program if memory allocation failed (using error())
+ * \param where
+ * string indicating which memory allocation failed
+ ************************************************************************
+ */
+void no_mem_exit(char *where)
+{
+ snprintf(errortext, ET_SIZE, "Could not allocate memory: %s",where);
+ error (errortext, 100);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D uint16 memory array -> uint16 array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Duint16(uint16 ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if(( *array2D = (uint16**)malloc(dim0 * sizeof(uint16*))) == NULL)
+ no_mem_exit("get_mem2Duint16: array2D");
+
+ if((*(*array2D) = (uint16* )calloc(dim0 * dim1,sizeof(uint16 ))) == NULL)
+ no_mem_exit("get_mem2Duint16: array2D");
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(uint16*) + dim1 * sizeof(uint16));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D short memory array -> short array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dshort(short ***array2D, int dim0, int dim1)
+{
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(short));
+ if(( *array2D = (short**)malloc(dim0 * sizeof(short*))) == NULL)
+ no_mem_exit("get_mem2Dshort: array2D");
+ if((*(*array2D) = (short* )_aligned_malloc(malloc_size, 32)) == NULL)
+ no_mem_exit("get_mem2Dshort: array2D");
+ memset((*array2D)[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(short*) + dim1 * sizeof(short));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory short array -> short array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dshort(short ****array3D,int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(short**);
+
+ if(((*array3D) = (short***)malloc(dim0 * sizeof(short**))) == NULL)
+ no_mem_exit("get_mem3Dshort: array3D");
+
+ mem_size += get_mem2Dshort(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory short array -> short array3D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+static MotionVector **get_mem2DMotionVector(int dim0, int dim1)
+{
+ MotionVector **array2D;
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1)*sizeof(MotionVector);
+ if((array2D = (MotionVector**)malloc(dim0 * sizeof(MotionVector*))) == NULL)
+ return 0;
+
+ if((array2D[0] = (MotionVector* )_aligned_malloc(malloc_size, 32)) == NULL)
+ {
+ free(array2D);
+ return 0;
+ }
+ memset(array2D[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ array2D[i] = array2D[i-1] + dim1;
+
+ return array2D;
+}
+
+MotionVector ***get_mem3DMotionVector(int dim0, int dim1, int dim2)
+{
+ MotionVector ***array3D;
+ int i;
+
+ if((array3D = (MotionVector***)malloc(dim0 * sizeof(MotionVector **))) == NULL)
+ return 0;
+
+ array3D[0] = get_mem2DMotionVector(dim0 * dim1, dim2);
+ if (!array3D[0])
+ {
+ free(array3D);
+ return 0;
+ }
+
+ for(i = 1; i < dim0; i++)
+ array3D[i] = array3D[i-1] + dim1;
+
+ return array3D;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D uint16 memory array
+ * which was allocated with get_mem2Duint16()
+ ************************************************************************
+ */
+void free_mem2Duint16(uint16 **array2D)
+{
+ if (array2D)
+ {
+ free (*array2D);
+ free (array2D);
+ }
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D short memory array
+ * which was allocated with get_mem2Dshort()
+ ************************************************************************
+ */
+void free_mem2Dshort(short **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D short memory array
+ * which was allocated with get_mem4Dshort()
+ ************************************************************************
+ */
+
+static void free_mem2DMotionVector(MotionVector **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free(*array2D);
+ free (array2D);
+ }
+
+}
+
+void free_mem3DMotionVector(MotionVector ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2DMotionVector( *array3D);
+ free (array3D);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> double array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Ddouble(double ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if((*array2D = (double**)malloc(dim0 * sizeof(double*))) == NULL)
+ no_mem_exit("get_mem2Ddouble: array2D");
+
+ if(((*array2D)[0] = (double* )calloc(dim0 * dim1,sizeof(double ))) == NULL)
+ no_mem_exit("get_mem2Ddouble: array2D");
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1 ;
+
+ return dim0 * (sizeof(double*) + dim1 * sizeof(double));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> double array2D[dim0][dim1]
+ * Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dodouble(double ***array2D, int dim0, int dim1, int offset)
+{
+ int i;
+
+ if((*array2D = (double**)malloc(dim0 * sizeof(double*))) == NULL)
+ no_mem_exit("get_mem2Dodouble: array2D");
+ if(((*array2D)[0] = (double* )calloc(dim0 * dim1, sizeof(double ))) == NULL)
+ no_mem_exit("get_mem2Dodouble: array2D");
+
+ (*array2D)[0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1 ;
+
+ return dim0 * (sizeof(double*) + dim1 * sizeof(double));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory double array -> double array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dodouble(double ****array3D, int dim0, int dim1, int dim2, int offset)
+{
+ int i,j;
+
+ if(((*array3D) = (double***)malloc(dim0 * sizeof(double**))) == NULL)
+ no_mem_exit("get_mem3Dodouble: array3D");
+
+ if(((*array3D)[0] = (double** )calloc(dim0 * dim1, sizeof(double*))) == NULL)
+ no_mem_exit("get_mem3Dodouble: array3D");
+
+ (*array3D) [0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1 ;
+
+ for (i = 0; i < dim0; i++)
+ for (j = -offset; j < dim1 - offset; j++)
+ if(((*array3D)[i][j] = (double* )calloc(dim2, sizeof(double))) == NULL)
+ no_mem_exit("get_mem3Dodouble: array3D");
+
+ return dim0*( sizeof(double**) + dim1 * ( sizeof(double*) + dim2 * sizeof(double)));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int array2D[dim0][dim1]
+ * Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_offset_mem2Dshort(short ***array2D, int dim0, int dim1, int offset_y, int offset_x)
+{
+ int i;
+
+ if((*array2D = (short**)malloc(dim0 * sizeof(short*))) == NULL)
+ no_mem_exit("get_offset_mem2Dshort: array2D");
+
+ if(((*array2D)[0] = (short* )calloc(dim0 * dim1, sizeof(short))) == NULL)
+ no_mem_exit("get_offset_mem2Dshort: array2D");
+ (*array2D)[0] += offset_x + offset_y * dim1;
+
+ for(i=-1 ; i > -offset_y - 1; i--)
+ {
+ (*array2D)[i] = (*array2D)[i+1] - dim1;
+ }
+
+ for(i=1 ; i < dim1 - offset_y; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(short*) + dim1 * sizeof(short));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory int array -> int array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Doint(int ****array3D, int dim0, int dim1, int dim2, int offset)
+{
+ int i,j;
+
+ if(((*array3D) = (int***)malloc(dim0 * sizeof(int**))) == NULL)
+ no_mem_exit("get_mem3Doint: array3D");
+
+ if(((*array3D)[0] = (int** )calloc(dim0 * dim1, sizeof(int*))) == NULL)
+ no_mem_exit("get_mem3Doint: array3D");
+
+ (*array3D) [0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1 ;
+
+ for (i = 0; i < dim0; i++)
+ for (j = -offset; j < dim1 - offset; j++)
+ if(((*array3D)[i][j] = (int* )calloc(dim2, sizeof(int))) == NULL)
+ no_mem_exit("get_mem3Doint: array3D");
+
+ return dim0 * (sizeof(int**) + dim1 * (sizeof(int*) + dim2 * sizeof(int)));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int array2D[dim0][dim1]
+ * Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Doint(int ***array2D, int dim0, int dim1, int offset)
+{
+ int i;
+
+ if((*array2D = (int**)malloc(dim0 * sizeof(int*))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+ if(((*array2D)[0] = (int* )calloc(dim0 * dim1, sizeof(int))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+
+ (*array2D)[0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1 ;
+
+ return dim0 * (sizeof(int*) + dim1 * sizeof(int));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> int array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+// same change as in get_mem3Dint
+int get_mem3Ddouble(double ****array3D, int dim0, int dim1, int dim2)
+{
+ int j, mem_size = dim0 * sizeof(double**);
+
+ double **array2D;
+
+ if(((*array3D) = (double***)malloc(dim0 * sizeof(double**))) == NULL)
+ no_mem_exit("get_mem3Ddouble: array3D");
+
+ mem_size += get_mem2Ddouble(&array2D, dim0 * dim1, dim2);
+
+ for(j = 0; j < dim0; j++)
+ {
+ (*array3D)[j] = &array2D[j * dim1];
+ }
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D double memory array
+ * which was allocated with get_mem2Ddouble()
+ ************************************************************************
+ */
+void free_mem2Ddouble(double **array2D)
+{
+ if (array2D)
+ {
+ if (*array2D)
+ free (*array2D);
+ else
+ error ("free_mem2Ddouble: trying to free unused memory",100);
+
+ free (array2D);
+
+ }
+ else
+ {
+ error ("free_mem2Ddouble: trying to free unused memory",100);
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* free 2D double memory array (with offset)
+* which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_mem2Dodouble(double **array2D, int offset)
+{
+ if (array2D)
+ {
+ array2D[0] -= offset;
+ if (array2D[0])
+ free (array2D[0]);
+ else error ("free_mem2Dodouble: trying to free unused memory",100);
+
+ free (array2D);
+
+ } else
+ {
+ error ("free_mem2Dodouble: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array with offset
+ ************************************************************************
+ */
+void free_mem3Dodouble(double ***array3D, int dim0, int dim1, int offset)
+{
+ int i, j;
+
+ if (array3D)
+ {
+ for (i = 0; i < dim0; i++)
+ {
+ for (j = -offset; j < dim1 - offset; j++)
+ {
+ if (array3D[i][j])
+ free(array3D[i][j]);
+ else
+ error ("free_mem3Dodouble: trying to free unused memory",100);
+ }
+ }
+ array3D[0] -= offset;
+ if (array3D[0])
+ free(array3D[0]);
+ else
+ error ("free_mem3Dodouble: trying to free unused memory",100);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3Dodouble: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array with offset
+ ************************************************************************
+ */
+void free_mem3Doint(int ***array3D, int dim0, int dim1, int offset)
+{
+ int i, j;
+
+ if (array3D)
+ {
+ for (i = 0; i < dim0; i++)
+ {
+ for (j = -offset; j < dim1 - offset; j++)
+ {
+ if (array3D[i][j])
+ free(array3D[i][j]);
+ else
+ error ("free_mem3Doint: trying to free unused memory",100);
+ }
+ }
+ array3D[0] -= offset;
+ if (array3D[0])
+ free(array3D[0]);
+ else
+ error ("free_mem3Doint: trying to free unused memory",100);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3Doint: trying to free unused memory",100);
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* free 2D double memory array (with offset)
+* which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_mem2Doint(int **array2D, int offset)
+{
+ if (array2D)
+ {
+ array2D[0] -= offset;
+ if (array2D[0])
+ free (array2D[0]);
+ else
+ error ("free_mem2Doint: trying to free unused memory",100);
+
+ free (array2D);
+
+ }
+ else
+ {
+ error ("free_mem2Doint: trying to free unused memory",100);
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* free 2D double memory array (with offset)
+* which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_offset_mem2Dshort(short **array2D, int dim1, int offset_y, int offset_x)
+{
+ if (array2D)
+ {
+ array2D[0] -= offset_x + offset_y * dim1;
+ if (array2D[0])
+ free (array2D[0]);
+ else
+ error ("free_offset_mem2Dshort: trying to free unused memory",100);
+
+ free (array2D);
+
+ }
+ else
+ {
+ error ("free_offset_mem2Dshort: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was alocated with get_mem3Dint()
+ ************************************************************************
+ */
+void free_mem3Ddouble(double ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Ddouble(*array3D);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3D: trying to free unused memory",100);
+ }
+}
+
+
+#endif
diff --git a/Src/h264dec/lcommon/src/memcache.c b/Src/h264dec/lcommon/src/memcache.c
new file mode 100644
index 00000000..ce3b29d1
--- /dev/null
+++ b/Src/h264dec/lcommon/src/memcache.c
@@ -0,0 +1,106 @@
+#include "memcache.h"
+#include "mbuffer.h"
+#include "memalloc.h"
+
+void image_cache_flush(ImageCache *cache)
+{
+ while (cache->head)
+ {
+ VideoImage *next = cache->head->next;
+ free_memImage(cache->head);
+ cache->head = next;
+ }
+ cache->size_x = 0;
+ cache->size_y = 0;
+}
+
+void image_cache_set_dimensions(ImageCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ {
+ image_cache_flush(cache);
+ cache->size_x = width;
+ cache->size_y = height;
+ }
+}
+
+int image_cache_dimensions_match(ImageCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ return 0;
+
+ return 1;
+}
+
+void image_cache_add(ImageCache *cache, VideoImage *image)
+{
+ image->next = cache->head;
+ cache->head = image;
+}
+
+struct video_image *image_cache_get(ImageCache *cache)
+{
+ if (cache->head)
+ {
+ VideoImage *ret = cache->head;
+ cache->head = ret->next;
+ ret->next = 0;
+ return ret;
+ }
+ return 0;
+}
+
+/* -------------
+
+PicMotion arrays are allowed with one extra slot in the first dimension
+which we use as the next pointer
+------------- */
+
+
+void motion_cache_flush(MotionCache *cache)
+{
+ while (cache->head)
+ {
+ PicMotion **next = (PicMotion **)cache->head[cache->size_y];
+ free_mem2DPicMotion(cache->head);
+ cache->head = next;
+ }
+ cache->size_x = 0;
+ cache->size_y = 0;
+}
+
+void motion_cache_set_dimensions(MotionCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ {
+ motion_cache_flush(cache);
+ cache->size_x = width;
+ cache->size_y = height;
+ }
+}
+
+int motion_cache_dimensions_match(MotionCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ return 0;
+
+ return 1;
+}
+
+void motion_cache_add(MotionCache *cache, PicMotion **image)
+{
+ image[cache->size_y] = (PicMotion *)cache->head;
+ cache->head = image;
+}
+
+struct pic_motion **motion_cache_get(MotionCache *cache)
+{
+ if (cache->head)
+ {
+ PicMotion **ret = cache->head;
+ cache->head = (PicMotion **)ret[cache->size_y];
+ ret[cache->size_y] = 0;
+ return ret;
+ }
+ return 0;
+}
diff --git a/Src/h264dec/lcommon/src/mv_prediction.c b/Src/h264dec/lcommon/src/mv_prediction.c
new file mode 100644
index 00000000..b4638d6d
--- /dev/null
+++ b/Src/h264dec/lcommon/src/mv_prediction.c
@@ -0,0 +1,250 @@
+/*!
+ *************************************************************************************
+ * \file mv_prediction.c
+ *
+ * \brief
+ * Motion Vector Prediction Functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ * - Karsten Sühring <suehring@hhi.de>
+ *************************************************************************************
+ */
+
+#include "global.h"
+#include "mbuffer.h"
+/*!
+ ************************************************************************
+ * \brief
+ * Get motion vector predictor
+ ************************************************************************
+ */
+static void GetMotionVectorPredictorMBAFF (Macroblock *currMB,
+ PixelPos *block, // <--> block neighbors
+ short pmv[2],
+ short ref_frame,
+ PicMotion **motion,
+ int mb_x,
+ int mb_y,
+ int blockshape_x,
+ int blockshape_y)
+{
+ int mv_a, mv_b, mv_c, pred_vec=0;
+ int mvPredType, rFrameL, rFrameU, rFrameUR;
+ int hv;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ mvPredType = MVPRED_MEDIAN;
+
+
+ if (currMB->mb_field)
+ {
+ rFrameL = block[0].available
+ ? (p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].ref_idx
+ : motion[block[0].pos_y][block[0].pos_x].ref_idx * 2) : -1;
+ rFrameU = block[1].available
+ ? (p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].ref_idx
+ : motion[block[1].pos_y][block[1].pos_x].ref_idx * 2) : -1;
+ rFrameUR = block[2].available
+ ? (p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].ref_idx
+ : motion[block[2].pos_y][block[2].pos_x].ref_idx * 2) : -1;
+ }
+ else
+ {
+ rFrameL = block[0].available
+ ? (p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].ref_idx >>1
+ : motion[block[0].pos_y][block[0].pos_x].ref_idx) : -1;
+ rFrameU = block[1].available
+ ? (p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].ref_idx >>1
+ : motion[block[1].pos_y][block[1].pos_x].ref_idx) : -1;
+ rFrameUR = block[2].available
+ ? (p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].ref_idx >>1
+ : motion[block[2].pos_y][block[2].pos_x].ref_idx) : -1;
+ }
+
+
+ /* Prediction if only one of the neighbors uses the reference frame
+ * we are checking
+ */
+ if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)
+ mvPredType = MVPRED_L;
+ else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)
+ mvPredType = MVPRED_U;
+ else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)
+ mvPredType = MVPRED_UR;
+ // Directional predictions
+ if(blockshape_x == 8 && blockshape_y == 16)
+ {
+ if(mb_x == 0)
+ {
+ if(rFrameL == ref_frame)
+ mvPredType = MVPRED_L;
+ }
+ else
+ {
+ if( rFrameUR == ref_frame)
+ mvPredType = MVPRED_UR;
+ }
+ }
+ else if(blockshape_x == 16 && blockshape_y == 8)
+ {
+ if(mb_y == 0)
+ {
+ if(rFrameU == ref_frame)
+ mvPredType = MVPRED_U;
+ }
+ else
+ {
+ if(rFrameL == ref_frame)
+ mvPredType = MVPRED_L;
+ }
+ }
+
+ for (hv=0; hv < 2; hv++)
+ {
+ if (hv == 0)
+ {
+ mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[hv] : 0;
+ mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[hv] : 0;
+ mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[hv] : 0;
+ }
+ else
+ {
+ if (currMB->mb_field)
+ {
+ mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].mv[hv]
+ : motion[block[0].pos_y][block[0].pos_x].mv[hv] / 2
+ : 0;
+ mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].mv[hv]
+ : motion[block[1].pos_y][block[1].pos_x].mv[hv] / 2
+ : 0;
+ mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].mv[hv]
+ : motion[block[2].pos_y][block[2].pos_x].mv[hv] / 2
+ : 0;
+ }
+ else
+ {
+ mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].mv[hv] * 2
+ : motion[block[0].pos_y][block[0].pos_x].mv[hv]
+ : 0;
+ mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].mv[hv] * 2
+ : motion[block[1].pos_y][block[1].pos_x].mv[hv]
+ : 0;
+ mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].mv[hv] * 2
+ : motion[block[2].pos_y][block[2].pos_x].mv[hv]
+ : 0;
+ }
+ }
+
+ switch (mvPredType)
+ {
+ case MVPRED_MEDIAN:
+ if(!(block[1].available || block[2].available))
+ {
+ pred_vec = mv_a;
+ }
+ else
+ {
+ pred_vec = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
+ }
+ break;
+ case MVPRED_L:
+ pred_vec = mv_a;
+ break;
+ case MVPRED_U:
+ pred_vec = mv_b;
+ break;
+ case MVPRED_UR:
+ pred_vec = mv_c;
+ break;
+ default:
+ break;
+ }
+
+ pmv[hv] = (short) pred_vec;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Get motion vector predictor
+ ************************************************************************
+ */
+// TODO: benski> make SSE3/MMX version
+static void GetMotionVectorPredictorNormal (Macroblock *currMB,
+ PixelPos *block, // <--> block neighbors
+ short pmv[2],
+ short ref_frame,
+ PicMotion **motion,
+ int mb_x,
+ int mb_y,
+ int blockshape_x,
+ int blockshape_y)
+{
+ int rFrameL = block[0].available ? motion[block[0].pos_y][block[0].pos_x].ref_idx : -1;
+ int rFrameU = block[1].available ? motion[block[1].pos_y][block[1].pos_x].ref_idx : -1;
+ int rFrameUR = block[2].available ? motion[block[2].pos_y][block[2].pos_x].ref_idx : -1;
+
+ /* Prediction if only one of the neighbors uses the reference frame
+ * we are checking
+ */
+ if (rFrameL == ref_frame &&
+ ((rFrameU != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x == 0) || (blockshape_x == 16 && blockshape_y == 8 && mb_y != 0)))
+ { // left
+ pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
+ pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
+ }
+ else if (rFrameU == ref_frame &&
+ ((rFrameL != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 16 && blockshape_y == 8 && mb_y == 0)))
+ { // up
+ pmv[0] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
+ pmv[1] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
+ }
+ else if (rFrameUR == ref_frame &&
+ ((rFrameL != ref_frame && rFrameU != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x != 0)))
+ { // upper right
+ pmv[0] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
+ pmv[1] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
+ }
+ else
+ { // median
+ if(!(block[1].available || block[2].available))
+ {
+ pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
+ pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
+ }
+ else
+ {
+ int mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
+ int mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
+ int mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
+ pmv[0] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
+ mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
+ mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
+ mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
+ pmv[1] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
+ }
+ }
+}
+
+void init_motion_vector_prediction(Macroblock *currMB, int mb_aff_frame_flag)
+{
+ if (mb_aff_frame_flag)
+ currMB->GetMVPredictor = GetMotionVectorPredictorMBAFF;
+ else
+ currMB->GetMVPredictor = GetMotionVectorPredictorNormal;
+}
diff --git a/Src/h264dec/lcommon/src/parsetcommon.c b/Src/h264dec/lcommon/src/parsetcommon.c
new file mode 100644
index 00000000..fe3f0e9a
--- /dev/null
+++ b/Src/h264dec/lcommon/src/parsetcommon.c
@@ -0,0 +1,244 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * parsetcommon.c
+ * \brief
+ * Picture and Sequence Parameter set generation and handling
+ * \date 25 November 2002
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ *
+ **************************************************************************************
+ */
+
+#include "global.h"
+#include "parsetcommon.h"
+#include "memalloc.h"
+/*!
+ *************************************************************************************
+ * \brief
+ * Allocates memory for a picture paramater set
+ *
+ * \return
+ * pointer to a pps
+ *************************************************************************************
+ */
+
+pic_parameter_set_rbsp_t *AllocPPS ()
+ {
+ pic_parameter_set_rbsp_t *p;
+
+ if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL)
+ no_mem_exit ("AllocPPS: PPS");
+ p->slice_group_id = NULL;
+ return p;
+ }
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Allocates memory for am sequence paramater set
+ *
+ * \return
+ * pointer to a sps
+ *************************************************************************************
+ */
+
+seq_parameter_set_rbsp_t *AllocSPS ()
+ {
+ seq_parameter_set_rbsp_t *p;
+
+ if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL)
+ no_mem_exit ("AllocSPS: SPS");
+ return p;
+ }
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Frees a picture parameter set
+ *
+ * \param pps to be freed
+ * Picture parameter set to be freed
+ *************************************************************************************
+ */
+
+ void FreePPS (pic_parameter_set_rbsp_t *pps)
+ {
+ assert (pps != NULL);
+ if (pps->slice_group_id != NULL)
+ free (pps->slice_group_id);
+ free (pps);
+ }
+
+
+ /*!
+ *************************************************************************************
+ * \brief
+ * Frees a sps
+ *
+ * \param sps
+ * Sequence parameter set to be freed
+ *************************************************************************************
+ */
+
+ void FreeSPS (seq_parameter_set_rbsp_t *sps)
+ {
+ assert (sps != NULL);
+ free (sps);
+ }
+
+
+int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2)
+{
+ unsigned i;
+ int equal = 1;
+
+ if ((!sps1->Valid) || (!sps2->Valid))
+ return 0;
+
+ equal &= (sps1->profile_idc == sps2->profile_idc);
+ equal &= (sps1->constrained_set0_flag == sps2->constrained_set0_flag);
+ equal &= (sps1->constrained_set1_flag == sps2->constrained_set1_flag);
+ equal &= (sps1->constrained_set2_flag == sps2->constrained_set2_flag);
+ equal &= (sps1->level_idc == sps2->level_idc);
+ equal &= (sps1->seq_parameter_set_id == sps2->seq_parameter_set_id);
+ equal &= (sps1->log2_max_frame_num_minus4 == sps2->log2_max_frame_num_minus4);
+ equal &= (sps1->pic_order_cnt_type == sps2->pic_order_cnt_type);
+
+ if (!equal) return equal;
+
+ if( sps1->pic_order_cnt_type == 0 )
+ {
+ equal &= (sps1->log2_max_pic_order_cnt_lsb_minus4 == sps2->log2_max_pic_order_cnt_lsb_minus4);
+ }
+
+ else if( sps1->pic_order_cnt_type == 1 )
+ {
+ equal &= (sps1->delta_pic_order_always_zero_flag == sps2->delta_pic_order_always_zero_flag);
+ equal &= (sps1->offset_for_non_ref_pic == sps2->offset_for_non_ref_pic);
+ equal &= (sps1->offset_for_top_to_bottom_field == sps2->offset_for_top_to_bottom_field);
+ equal &= (sps1->num_ref_frames_in_pic_order_cnt_cycle == sps2->num_ref_frames_in_pic_order_cnt_cycle);
+ if (!equal) return equal;
+
+ for ( i = 0 ; i< sps1->num_ref_frames_in_pic_order_cnt_cycle ;i ++)
+ equal &= (sps1->offset_for_ref_frame[i] == sps2->offset_for_ref_frame[i]);
+ }
+
+ equal &= (sps1->num_ref_frames == sps2->num_ref_frames);
+ equal &= (sps1->gaps_in_frame_num_value_allowed_flag == sps2->gaps_in_frame_num_value_allowed_flag);
+ equal &= (sps1->pic_width_in_mbs_minus1 == sps2->pic_width_in_mbs_minus1);
+ equal &= (sps1->pic_height_in_map_units_minus1 == sps2->pic_height_in_map_units_minus1);
+ equal &= (sps1->frame_mbs_only_flag == sps2->frame_mbs_only_flag);
+
+ if (!equal) return equal;
+ if( !sps1->frame_mbs_only_flag )
+ equal &= (sps1->mb_adaptive_frame_field_flag == sps2->mb_adaptive_frame_field_flag);
+
+ equal &= (sps1->direct_8x8_inference_flag == sps2->direct_8x8_inference_flag);
+ equal &= (sps1->frame_cropping_flag == sps2->frame_cropping_flag);
+ if (!equal) return equal;
+ if (sps1->frame_cropping_flag)
+ {
+ equal &= (sps1->frame_cropping_rect_left_offset == sps2->frame_cropping_rect_left_offset);
+ equal &= (sps1->frame_cropping_rect_right_offset == sps2->frame_cropping_rect_right_offset);
+ equal &= (sps1->frame_cropping_rect_top_offset == sps2->frame_cropping_rect_top_offset);
+ equal &= (sps1->frame_cropping_rect_bottom_offset == sps2->frame_cropping_rect_bottom_offset);
+ }
+ equal &= (sps1->vui_parameters_present_flag == sps2->vui_parameters_present_flag);
+
+ return equal;
+}
+
+int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2)
+{
+ unsigned i, j;
+ int equal = 1;
+
+ if ((!pps1->Valid) || (!pps2->Valid))
+ return 0;
+
+ equal &= (pps1->pic_parameter_set_id == pps2->pic_parameter_set_id);
+ equal &= (pps1->seq_parameter_set_id == pps2->seq_parameter_set_id);
+ equal &= (pps1->entropy_coding_mode_flag == pps2->entropy_coding_mode_flag);
+ equal &= (pps1->bottom_field_pic_order_in_frame_present_flag == pps2->bottom_field_pic_order_in_frame_present_flag);
+ equal &= (pps1->num_slice_groups_minus1 == pps2->num_slice_groups_minus1);
+
+ if (!equal) return equal;
+
+ if (pps1->num_slice_groups_minus1>0)
+ {
+ equal &= (pps1->slice_group_map_type == pps2->slice_group_map_type);
+ if (!equal) return equal;
+ if (pps1->slice_group_map_type == 0)
+ {
+ for (i=0; i<=pps1->num_slice_groups_minus1; i++)
+ equal &= (pps1->run_length_minus1[i] == pps2->run_length_minus1[i]);
+ }
+ else if( pps1->slice_group_map_type == 2 )
+ {
+ for (i=0; i<pps1->num_slice_groups_minus1; i++)
+ {
+ equal &= (pps1->top_left[i] == pps2->top_left[i]);
+ equal &= (pps1->bottom_right[i] == pps2->bottom_right[i]);
+ }
+ }
+ else if( pps1->slice_group_map_type == 3 || pps1->slice_group_map_type==4 || pps1->slice_group_map_type==5 )
+ {
+ equal &= (pps1->slice_group_change_direction_flag == pps2->slice_group_change_direction_flag);
+ equal &= (pps1->slice_group_change_rate_minus1 == pps2->slice_group_change_rate_minus1);
+ }
+ else if( pps1->slice_group_map_type == 6 )
+ {
+ equal &= (pps1->pic_size_in_map_units_minus1 == pps2->pic_size_in_map_units_minus1);
+ if (!equal) return equal;
+ for (i=0; i<=pps1->pic_size_in_map_units_minus1; i++)
+ equal &= (pps1->slice_group_id[i] == pps2->slice_group_id[i]);
+ }
+ }
+
+ equal &= (pps1->num_ref_idx_l0_active_minus1 == pps2->num_ref_idx_l0_active_minus1);
+ equal &= (pps1->num_ref_idx_l1_active_minus1 == pps2->num_ref_idx_l1_active_minus1);
+ equal &= (pps1->weighted_pred_flag == pps2->weighted_pred_flag);
+ equal &= (pps1->weighted_bipred_idc == pps2->weighted_bipred_idc);
+ equal &= (pps1->pic_init_qp_minus26 == pps2->pic_init_qp_minus26);
+ equal &= (pps1->pic_init_qs_minus26 == pps2->pic_init_qs_minus26);
+ equal &= (pps1->chroma_qp_index_offset == pps2->chroma_qp_index_offset);
+ equal &= (pps1->deblocking_filter_control_present_flag == pps2->deblocking_filter_control_present_flag);
+ equal &= (pps1->constrained_intra_pred_flag == pps2->constrained_intra_pred_flag);
+ equal &= (pps1->redundant_pic_cnt_present_flag == pps2->redundant_pic_cnt_present_flag);
+
+ if (!equal) return equal;
+
+ //Fidelity Range Extensions Stuff
+ //It is initialized to zero, so should be ok to check all the time.
+ equal &= (pps1->transform_8x8_mode_flag == pps2->transform_8x8_mode_flag);
+ equal &= (pps1->pic_scaling_matrix_present_flag == pps2->pic_scaling_matrix_present_flag);
+ if(pps1->pic_scaling_matrix_present_flag)
+ {
+ for(i = 0; i < (6 + ((unsigned)pps1->transform_8x8_mode_flag << 1)); i++)
+ {
+ equal &= (pps1->pic_scaling_list_present_flag[i] == pps2->pic_scaling_list_present_flag[i]);
+ if(pps1->pic_scaling_list_present_flag[i])
+ {
+ if(i < 6)
+ {
+ for (j = 0; j < 16; j++)
+ equal &= (pps1->ScalingList4x4[i][j] == pps2->ScalingList4x4[i][j]);
+ }
+ else
+ {
+ for (j = 0; j < 64; j++)
+ equal &= (pps1->ScalingList8x8[i-6][j] == pps2->ScalingList8x8[i-6][j]);
+ }
+ }
+ }
+ }
+ equal &= (pps1->second_chroma_qp_index_offset == pps2->second_chroma_qp_index_offset);
+
+ return equal;
+}
diff --git a/Src/h264dec/lcommon/src/transform.c b/Src/h264dec/lcommon/src/transform.c
new file mode 100644
index 00000000..617ca7c1
--- /dev/null
+++ b/Src/h264dec/lcommon/src/transform.c
@@ -0,0 +1,809 @@
+/*!
+***************************************************************************
+* \file transform.c
+*
+* \brief
+* Transform functions
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Alexis Michael Tourapis
+* \date
+* 01. July 2007
+**************************************************************************
+*/
+#include "global.h"
+#include "transform.h"
+#include <emmintrin.h>
+
+void forward4x4(int **block, int **tblock, int pos_y, int pos_x)
+{
+ int i, ii;
+ int tmp[16];
+ int *pTmp = tmp, *pblock;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ for (i=pos_y; i < pos_y + BLOCK_SIZE; i++)
+ {
+ pblock = &block[i][pos_x];
+ p0 = *(pblock++);
+ p1 = *(pblock++);
+ p2 = *(pblock++);
+ p3 = *(pblock );
+
+ t0 = p0 + p3;
+ t1 = p1 + p2;
+ t2 = p1 - p2;
+ t3 = p0 - p3;
+
+ *(pTmp++) = t0 + t1;
+ *(pTmp++) = (t3 << 1) + t2;
+ *(pTmp++) = t0 - t1;
+ *(pTmp++) = t3 - (t2 << 1);
+ }
+
+ // Vertical
+ for (i=0; i < BLOCK_SIZE; i++)
+ {
+ pTmp = tmp + i;
+ p0 = *pTmp;
+ p1 = *(pTmp += BLOCK_SIZE);
+ p2 = *(pTmp += BLOCK_SIZE);
+ p3 = *(pTmp += BLOCK_SIZE);
+
+ t0 = p0 + p3;
+ t1 = p1 + p2;
+ t2 = p1 - p2;
+ t3 = p0 - p3;
+
+ ii = pos_x + i;
+ tblock[pos_y ][ii] = t0 + t1;
+ tblock[pos_y + 1][ii] = t2 + (t3 << 1);
+ tblock[pos_y + 2][ii] = t0 - t1;
+ tblock[pos_y + 3][ii] = t3 - (t2 << 1);
+ }
+}
+
+static void inverse4x4(const h264_short_block_t tblock, h264_short_block_t block, int pos_y, int pos_x)
+{
+ int i;
+ short tmp[16];
+ short *pTmp = tmp;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ t0 = tblock[i][0];
+ t1 = tblock[i][1];
+ t2 = tblock[i][2];
+ t3 = tblock[i][3];
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ }
+
+ // Vertical
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ pTmp = tmp + i;
+ t0 = *pTmp;
+ t1 = *(pTmp += BLOCK_SIZE);
+ t2 = *(pTmp += BLOCK_SIZE);
+ t3 = *(pTmp += BLOCK_SIZE);
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 =(t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+
+ block[0][i] = p0 + p3;
+ block[1][i] = p1 + p2;
+ block[2][i] = p1 - p2;
+ block[3][i] = p0 - p3;
+ }
+}
+
+#ifdef _M_IX86
+// benski> this exists just for conformance testing. not used in production code
+static void inverse4x4_sse2_x86(const h264_short_macroblock_t tblock, h264_short_macroblock_t block, int pos_y, int pos_x)
+{
+ __asm
+ {
+ mov edx, pos_y
+ shl edx, 4 // 16 step stride
+ add edx, pos_x
+ shl edx, 1 // * sizeof(short)
+
+ // eax: pointer to the start of tblock (offset by passed pos_y, pos_x)
+ mov eax, edx
+ add eax, tblock
+
+ // esi: results
+ mov esi, edx
+ add esi, block
+
+ // load 4x4 matrix
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 32[eax]
+ movq mm2, MMWORD PTR 64[eax]
+ movq mm3, MMWORD PTR 96[eax]
+
+ // rotate 4x4 matrix
+ movq mm4, mm0 // p0 = mm4 (copy)
+ punpcklwd mm0, mm2 // r0 = mm0
+ punpckhwd mm4, mm2 // r2 = mm4
+ movq mm5, mm1 // p1 = mm5 (copy)
+ punpcklwd mm1, mm3 // r1 = mm1
+ punpckhwd mm5, mm3 // r3 = mm5
+ movq mm6, mm0 // r0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // r2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+
+ // rotate 4x4 matrix to set up for vertical
+ movq mm4, mm0 // r0 = mm4 (copy)
+ punpcklwd mm0, mm2 // p0 = mm0
+ punpckhwd mm4, mm2 // p2 = mm4
+ movq mm5, mm1 // r1 = mm5 (copy)
+ punpcklwd mm1, mm3 // p1 = mm1
+ punpckhwd mm5, mm3 // p3 = mm5
+ movq mm6, mm0 // p0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // p2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+ movq XMMWORD PTR 0[esi], mm0
+ movq XMMWORD PTR 32[esi], mm1
+ movq XMMWORD PTR 64[esi], mm2
+ movq XMMWORD PTR 96[esi], mm3
+ }
+}
+#endif
+
+static void sample_reconstruct(h264_imgpel_macroblock_t curImg, const h264_imgpel_macroblock_t mpr, const h264_short_block_t tblock, int joff, int mb_x, int max_imgpel_value)
+{
+ #ifdef _M_IX86
+ __asm
+ {
+ // mm0 : constant value 32
+ mov edx, 0x00200020
+ movd mm0, edx
+ punpckldq mm0, mm0
+
+ // ecx: y offset
+ mov ecx, joff
+ shl ecx, 4 // imgpel stuff is going to be 16 byte stride
+ add ecx, mb_x
+
+ // eax: curImg
+ mov eax, curImg
+ add eax, ecx
+
+ // edx: mpr
+ mov edx, mpr
+ add edx, ecx
+
+ // ecx: tblock (which is short, not byte)
+ mov ecx, tblock
+
+ // mm7: zero
+ pxor mm7, mm7
+
+ // load coefficients
+ movq mm1, MMWORD PTR 0[ecx]
+ movq mm2, MMWORD PTR 8[ecx]
+ movq mm3, MMWORD PTR 16[ecx]
+ movq mm4, MMWORD PTR 24[ecx]
+ paddw mm1, mm0 // rres + 32
+ paddw mm2, mm0 // rres + 32
+ paddw mm3, mm0 // rres + 32
+ paddw mm0, mm4 // rres + 32
+ psraw mm1, 6 // (rres + 32) >> 6
+ psraw mm2, 6 // (rres + 32) >> 6
+ psraw mm3, 6 // (rres + 32) >> 6
+ psraw mm0, 6 // (rres + 32) >> 6
+ // mm1-mm3: tblock[0] - tblock[2], mm0: tblock[3]
+
+ // convert mpr from unsigned char to short
+ movd mm4, DWORD PTR 0[edx]
+ movd mm5, DWORD PTR 16[edx]
+ movd mm6, DWORD PTR 32[edx]
+ punpcklbw mm4, mm7
+ punpcklbw mm5, mm7
+ punpcklbw mm6, mm7
+ paddsw mm4, mm1 // pred_row + rres_row
+ movd mm1, DWORD PTR 48[edx] // reuse mm1 for mpr[3]
+ paddsw mm5, mm2 // pred_row + rres_row
+ punpcklbw mm1, mm7
+ paddsw mm6, mm3 // pred_row + rres_row
+ paddsw mm1, mm0 // pred_row + rres_row
+ // results in mm4, mm5, mm6, mm1
+
+ // move back to 8 bit
+ packuswb mm4, mm7
+ packuswb mm5, mm7
+ packuswb mm6, mm7
+ packuswb mm1, mm7
+ movd DWORD PTR 0[eax], mm4
+ movd DWORD PTR 16[eax], mm5
+ movd DWORD PTR 32[eax], mm6
+ movd DWORD PTR 48[eax], mm1
+ }
+#else
+ int i, j;
+
+ for (j = 0; j < BLOCK_SIZE; j++)
+ {
+ for (i=0;i<BLOCK_SIZE;i++)
+ curImg[j+joff][mb_x+i] = (imgpel) iClip1( max_imgpel_value, rshift_rnd_sf(tblock[j][i], DQ_BITS) + mpr[j+joff][mb_x+i]);
+ }
+#endif
+}
+
+#if defined(_M_IX86) && defined(_DEBUG)
+void itrans4x4_sse2(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
+{
+ __declspec(align(32)) static const short const32[4] = {32, 32, 32, 32};
+ __asm
+ {
+ mov edx, pos_y
+ shl edx, 4 // imgpel stuff is going to be 16 byte stride
+ add edx, pos_x
+
+ // eax: tblock
+ lea eax, [edx*2]
+ add eax, tblock
+
+ // ecx: mpr
+ mov ecx, mb_pred
+ add ecx, edx
+
+ // edx: results
+ add edx, mb_rec
+
+ // load 4x4 matrix
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 32[eax]
+ movq mm2, MMWORD PTR 64[eax]
+ movq mm3, MMWORD PTR 96[eax]
+
+ // rotate 4x4 matrix
+ movq mm4, mm0 // p0 = mm4 (copy)
+ punpcklwd mm0, mm2 // r0 = mm0
+ punpckhwd mm4, mm2 // r2 = mm4
+ movq mm5, mm1 // p1 = mm5 (copy)
+ punpcklwd mm1, mm3 // r1 = mm1
+ punpckhwd mm5, mm3 // r3 = mm5
+ movq mm6, mm0 // r0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // r2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+
+ // rotate 4x4 matrix to set up for vertical
+ movq mm4, mm0 // r0 = mm4 (copy)
+ punpcklwd mm0, mm2 // p0 = mm0
+ punpckhwd mm4, mm2 // p2 = mm4
+ movq mm5, mm1 // r1 = mm5 (copy)
+ punpcklwd mm1, mm3 // p1 = mm1
+ punpckhwd mm5, mm3 // p3 = mm5
+ movq mm6, mm0 // p0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // p2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+/* --- 4x4 iDCT done, now time to combine with mpr --- */
+ // mm0 : constant value 32
+ movq mm7, const32
+
+ paddw mm0, mm7 // rres + 32
+ psraw mm0, 6 // (rres + 32) >> 6
+ paddw mm1, mm7 // rres + 32
+ psraw mm1, 6 // (rres + 32) >> 6
+ paddw mm2, mm7 // rres + 32
+ psraw mm2, 6 // (rres + 32) >> 6
+ paddw mm3, mm7 // rres + 32
+ psraw mm3, 6 // (rres + 32) >> 6
+
+ pxor mm7, mm7
+
+ // convert mpr from unsigned char to short
+ movd mm4, DWORD PTR 0[ecx]
+ movd mm5, DWORD PTR 16[ecx]
+ movd mm6, DWORD PTR 32[ecx]
+ punpcklbw mm4, mm7
+ punpcklbw mm5, mm7
+ punpcklbw mm6, mm7
+ paddsw mm4, mm0 // pred_row + rres_row
+ movd mm0, DWORD PTR 48[ecx] // reuse mm0 for mpr[3]
+ paddsw mm5, mm1 // pred_row + rres_row
+ punpcklbw mm0, mm7
+ paddsw mm6, mm2 // pred_row + rres_row
+ paddsw mm0, mm3 // pred_row + rres_row
+ // results in mm4, mm5, mm6, mm0
+
+ // move back to 8 bit
+ packuswb mm4, mm7
+ packuswb mm5, mm7
+ packuswb mm6, mm7
+ packuswb mm0, mm7
+ movd DWORD PTR 0[edx], mm4
+ movd DWORD PTR 16[edx], mm5
+ movd DWORD PTR 32[edx], mm6
+ movd DWORD PTR 48[edx], mm0
+ }
+}
+#elif defined(_M_X64)
+static void itrans4x4_sse2(const h264_int_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
+{
+ __declspec(align(32)) static const int const32[4] = {32, 32, 32, 32};
+ __m128i p0,p1,p2,p3;
+ __m128i t0,t1,t2,t3;
+ __m128i r0,r1,r2,r3;
+ __m128i c32, zero;
+
+ // horizontal
+ // load registers in vertical mode, we'll rotate them next
+ p0 = _mm_loadu_si128((__m128i *)&tblock[pos_y][pos_x]); // 00 01 02 03
+ p1 = _mm_loadu_si128((__m128i *)&tblock[pos_y+1][pos_x]); // 10 11 12 13
+ p2 = _mm_loadu_si128((__m128i *)&tblock[pos_y+2][pos_x]); // 20 21 22 23
+ p3 = _mm_loadu_si128((__m128i *)&tblock[pos_y+3][pos_x]); // 30 31 32 33
+
+ // rotate 4x4 matrix
+ r0 = _mm_unpacklo_epi32(p0, p2); // 00 20 01 21
+ r1 = _mm_unpacklo_epi32(p1, p3); // 10 30 11 31
+ r2 = _mm_unpackhi_epi32(p0, p2); // 02 22 03 23
+ r3 = _mm_unpackhi_epi32(p1, p3); // 12 32 13 33
+ t0 = _mm_unpacklo_epi32(r0, r1); // 00 10 20 30
+ t1 = _mm_unpackhi_epi32(r0, r1); // 01 11 21 31
+ t2 = _mm_unpacklo_epi32(r2, r3); // 02 12 22 32
+ t3 = _mm_unpackhi_epi32(r2, r3); // 03 13 23 33
+
+ p0 = _mm_add_epi32(t0, t2); //t0 + t2;
+ p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
+ p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
+ p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
+ p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
+ p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
+
+ t0 = _mm_add_epi32(p0, p3); //p0 + p3;
+ t1 = _mm_add_epi32(p1, p2);//p1 + p2;
+ t2 = _mm_sub_epi32(p1, p2); //p1 - p2;
+ t3 = _mm_sub_epi32(p0, p3); //p0 - p3;
+
+ // rotate 4x4 matrix to set up for vertical
+ r0 = _mm_unpacklo_epi32(t0, t2);
+ r1 = _mm_unpacklo_epi32(t1, t3);
+ r2 = _mm_unpackhi_epi32(t0, t2);
+ r3 = _mm_unpackhi_epi32(t1, t3);
+ t0 = _mm_unpacklo_epi32(r0, r1);
+ t1 = _mm_unpackhi_epi32(r0, r1);
+ t2 = _mm_unpacklo_epi32(r2, r3);
+ t3 = _mm_unpackhi_epi32(r2, r3);
+
+ // vertical
+ p0 = _mm_add_epi32(t0, t2); //t0 + t2;
+ p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
+ p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
+ r0 = _mm_add_epi32(p0, p3); //p0 + p3;
+ r3 = _mm_sub_epi32(p0, p3); //p0 - p3;
+ p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
+ p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
+ p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
+ r1 = _mm_add_epi32(p1, p2);//p1 + p2;
+ r2 = _mm_sub_epi32(p1, p2); //p1 - p2;
+
+ c32 = _mm_load_si128((const __m128i *)const32);
+ zero = _mm_setzero_si128();
+
+ // (x + 32) >> 6
+ r0 = _mm_add_epi32(r0, c32);
+ r0 = _mm_srai_epi32(r0, 6);
+ r1 = _mm_add_epi32(r1, c32);
+ r1 = _mm_srai_epi32(r1, 6);
+ r2 = _mm_add_epi32(r2, c32);
+ r2 = _mm_srai_epi32(r2, 6);
+ r3 = _mm_add_epi32(r3, c32);
+ r3 = _mm_srai_epi32(r3, 6);
+
+ // convert to 16bit values
+ r0 = _mm_packs_epi32(r0, r1);
+ r2 = _mm_packs_epi32(r2, r3);
+
+ // convert mpr from unsigned char to short
+ p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y][pos_x]);
+ p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+1][pos_x]);
+ p0 = _mm_unpacklo_epi32(p0, p1);
+ p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
+ r0 = _mm_add_epi16(r0, p0);
+
+ p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+2][pos_x]);
+ p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+3][pos_x]);
+ p0 = _mm_unpacklo_epi32(p0, p1);
+ p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
+ r2 = _mm_add_epi16(r2, p0);
+
+ r0 = _mm_packus_epi16(r0, r2); // convert to unsigned char
+ *(int32_t *)&mb_rec[pos_y][pos_x] = _mm_cvtsi128_si32(r0);
+ r0 = _mm_srli_si128(r0, 4);
+ *(int32_t *)&mb_rec[pos_y+1][pos_x] = _mm_cvtsi128_si32(r0);
+ r0 = _mm_srli_si128(r0, 4);
+ *(int32_t *)&mb_rec[pos_y+2][pos_x] = _mm_cvtsi128_si32(r0);
+ r0 = _mm_srli_si128(r0, 4);
+ *(int32_t *)&mb_rec[pos_y+3][pos_x] = _mm_cvtsi128_si32(r0);
+}
+#endif
+
+void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
+{
+ inverse4x4(tblock, (h264_short_block_row_t *)tblock,pos_y,pos_x);
+ sample_reconstruct(mb_rec, mb_pred, tblock, pos_y, pos_x, 255);
+}
+
+void ihadamard4x4(int block[4][4])
+{
+ int i;
+ int tmp[16];
+ int *pTmp = tmp;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ t0 = block[i][0];
+ t1 = block[i][1];
+ t2 = block[i][2];
+ t3 = block[i][3];
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = t1 - t3;
+ p3 = t1 + t3;
+
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ }
+
+ // Vertical
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ pTmp = tmp + i;
+ t0 = *pTmp;
+ t1 = *(pTmp += BLOCK_SIZE);
+ t2 = *(pTmp += BLOCK_SIZE);
+ t3 = *(pTmp += BLOCK_SIZE);
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = t1 - t3;
+ p3 = t1 + t3;
+
+ block[0][i] = p0 + p3;
+ block[1][i] = p1 + p2;
+ block[2][i] = p1 - p2;
+ block[3][i] = p0 - p3;
+ }
+}
+
+void ihadamard4x2(int **tblock, int **block)
+{
+ int i;
+ int tmp[8];
+ int *pTmp = tmp;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ *(pTmp++) = tblock[0][0] + tblock[1][0];
+ *(pTmp++) = tblock[0][1] + tblock[1][1];
+ *(pTmp++) = tblock[0][2] + tblock[1][2];
+ *(pTmp++) = tblock[0][3] + tblock[1][3];
+
+ *(pTmp++) = tblock[0][0] - tblock[1][0];
+ *(pTmp++) = tblock[0][1] - tblock[1][1];
+ *(pTmp++) = tblock[0][2] - tblock[1][2];
+ *(pTmp ) = tblock[0][3] - tblock[1][3];
+
+ // Vertical
+ pTmp = tmp;
+ for (i = 0; i < 2; i++)
+ {
+ p0 = *(pTmp++);
+ p1 = *(pTmp++);
+ p2 = *(pTmp++);
+ p3 = *(pTmp++);
+
+ t0 = p0 + p2;
+ t1 = p0 - p2;
+ t2 = p1 - p3;
+ t3 = p1 + p3;
+
+ // coefficients (transposed)
+ block[0][i] = t0 + t3;
+ block[1][i] = t1 + t2;
+ block[2][i] = t1 - t2;
+ block[3][i] = t0 - t3;
+ }
+}
+
+//following functions perform 8 additions, 8 assignments. Should be a bit faster
+void ihadamard2x2(int tblock[4], int block[4])
+{
+ int t0,t1,t2,t3;
+
+ t0 = tblock[0] + tblock[1];
+ t1 = tblock[0] - tblock[1];
+ t2 = tblock[2] + tblock[3];
+ t3 = tblock[2] - tblock[3];
+
+ block[0] = (t0 + t2);
+ block[1] = (t1 + t3);
+ block[2] = (t0 - t2);
+ block[3] = (t1 - t3);
+}
+
diff --git a/Src/h264dec/lcommon/src/win32.c b/Src/h264dec/lcommon/src/win32.c
new file mode 100644
index 00000000..7d921e1e
--- /dev/null
+++ b/Src/h264dec/lcommon/src/win32.c
@@ -0,0 +1,67 @@
+
+/*!
+ *************************************************************************************
+ * \file win32.c
+ *
+ * \brief
+ * Platform dependent code
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Suehring <suehring@hhi.de>
+ *************************************************************************************
+ */
+
+#include "global.h"
+
+
+#ifdef _WIN32
+
+static LARGE_INTEGER freq;
+
+void gettime(TIME_T* time)
+{
+ QueryPerformanceCounter(time);
+}
+
+int64 timediff(TIME_T* start, TIME_T* end)
+{
+ return (int64)((end->QuadPart - start->QuadPart));
+}
+
+int64 timenorm(int64 cur_time)
+{
+ static int first = 1;
+
+ if(first)
+ {
+ QueryPerformanceFrequency(&freq);
+ first = 0;
+ }
+
+ return (int64)(cur_time * 1000 /(freq.QuadPart));
+}
+
+#else
+
+static struct timezone tz;
+
+void gettime(TIME_T* time)
+{
+ gettimeofday(time, &tz);
+}
+
+int64 timediff(TIME_T* start, TIME_T* end)
+{
+ int t1, t2;
+
+ t1 = end->tv_sec - start->tv_sec;
+ t2 = end->tv_usec - start->tv_usec;
+ return (int64) t2 + (int64) t1 * (int64) 1000000;
+}
+
+int64 timenorm(int64 cur_time)
+{
+ return (int64)(cur_time / (int64) 1000);
+}
+#endif
diff --git a/Src/h264dec/ldecod/inc/biaridecod.h b/Src/h264dec/ldecod/inc/biaridecod.h
new file mode 100644
index 00000000..9364632b
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/biaridecod.h
@@ -0,0 +1,157 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ * biaridecod.h
+ *
+ * \brief
+ * Headerfile for binary arithmetic decoder routines
+ *
+ * \author
+ * Detlev Marpe,
+ * Gabi Blättermann
+ * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+ *
+ * \date
+ * 21. Oct 2000
+ **************************************************************************
+ */
+
+#ifndef _BIARIDECOD_H_
+#define _BIARIDECOD_H_
+
+
+/************************************************************************
+ * D e f i n i t i o n s
+ ***********************************************************************
+ */
+
+/* Range table for LPS */
+/*
+static const byte rLPS_table_64x4[64][4]=
+{
+ { 128, 176, 208, 240},
+ { 128, 167, 197, 227},
+ { 128, 158, 187, 216},
+ { 123, 150, 178, 205},
+ { 116, 142, 169, 195},
+ { 111, 135, 160, 185},
+ { 105, 128, 152, 175},
+ { 100, 122, 144, 166},
+ { 95, 116, 137, 158},
+ { 90, 110, 130, 150},
+ { 85, 104, 123, 142},
+ { 81, 99, 117, 135},
+ { 77, 94, 111, 128},
+ { 73, 89, 105, 122},
+ { 69, 85, 100, 116},
+ { 66, 80, 95, 110},
+ { 62, 76, 90, 104},
+ { 59, 72, 86, 99},
+ { 56, 69, 81, 94},
+ { 53, 65, 77, 89},
+ { 51, 62, 73, 85},
+ { 48, 59, 69, 80},
+ { 46, 56, 66, 76},
+ { 43, 53, 63, 72},
+ { 41, 50, 59, 69},
+ { 39, 48, 56, 65},
+ { 37, 45, 54, 62},
+ { 35, 43, 51, 59},
+ { 33, 41, 48, 56},
+ { 32, 39, 46, 53},
+ { 30, 37, 43, 50},
+ { 29, 35, 41, 48},
+ { 27, 33, 39, 45},
+ { 26, 31, 37, 43},
+ { 24, 30, 35, 41},
+ { 23, 28, 33, 39},
+ { 22, 27, 32, 37},
+ { 21, 26, 30, 35},
+ { 20, 24, 29, 33},
+ { 19, 23, 27, 31},
+ { 18, 22, 26, 30},
+ { 17, 21, 25, 28},
+ { 16, 20, 23, 27},
+ { 15, 19, 22, 25},
+ { 14, 18, 21, 24},
+ { 14, 17, 20, 23},
+ { 13, 16, 19, 22},
+ { 12, 15, 18, 21},
+ { 12, 14, 17, 20},
+ { 11, 14, 16, 19},
+ { 11, 13, 15, 18},
+ { 10, 12, 15, 17},
+ { 10, 12, 14, 16},
+ { 9, 11, 13, 15},
+ { 9, 11, 12, 14},
+ { 8, 10, 12, 14},
+ { 8, 9, 11, 13},
+ { 7, 9, 11, 12},
+ { 7, 9, 10, 12},
+ { 7, 8, 10, 11},
+ { 6, 8, 9, 11},
+ { 6, 7, 9, 10},
+ { 6, 7, 8, 9},
+ { 2, 2, 2, 2}
+};*/
+static const byte rLPS_table_64x4[4][64]={
+{128, 128, 128, 123, 116, 111, 105, 100, 95, 90, 85, 81, 77, 73, 69, 66,
+ 62, 59, 56, 53, 51, 48, 46, 43, 41, 39, 37, 35, 33, 32, 30, 29,
+ 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 12,
+ 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 7, 6, 6, 6, 2,
+ },
+{176, 167, 158, 150, 142, 135, 128, 122, 116, 110, 104, 99, 94, 89, 85, 80,
+ 76, 72, 69, 65, 62, 59, 56, 53, 50, 48, 45, 43, 41, 39, 37, 35,
+ 33, 31, 30, 28, 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15,
+ 14, 14, 13, 12, 12, 11, 11, 10, 9, 9, 9, 8, 8, 7, 7, 2,
+ },
+{208, 197, 187, 178, 169, 160, 152, 144, 137, 130, 123, 117, 111, 105, 100, 95,
+ 90, 86, 81, 77, 73, 69, 66, 63, 59, 56, 54, 51, 48, 46, 43, 41,
+ 39, 37, 35, 33, 32, 30, 29, 27, 26, 25, 23, 22, 21, 20, 19, 18,
+ 17, 16, 15, 15, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 2,
+ },
+{240, 227, 216, 205, 195, 185, 175, 166, 158, 150, 142, 135, 128, 122, 116, 110,
+ 104, 99, 94, 89, 85, 80, 76, 72, 69, 65, 62, 59, 56, 53, 50, 48,
+ 45, 43, 41, 39, 37, 35, 33, 31, 30, 28, 27, 25, 24, 23, 22, 21,
+ 20, 19, 18, 17, 16, 15, 14, 14, 13, 12, 12, 11, 11, 10, 9, 2,
+}};
+
+// make uint16 to match biari_decode_symbol
+static const byte AC_next_state_MPS_64[64] =
+{
+ 1,2,3,4,5,6,7,8,9,10,
+ 11,12,13,14,15,16,17,18,19,20,
+ 21,22,23,24,25,26,27,28,29,30,
+ 31,32,33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,49,50,
+ 51,52,53,54,55,56,57,58,59,60,
+ 61,62,62,63
+};
+
+// make uint16 to match biari_decode_symbol
+static const byte AC_next_state_LPS_64[64] =
+{
+ 0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
+ 8, 9, 9,11,11,12,13,13,15,15,
+ 16,16,18,18,19,19,21,21,22,22,
+ 23,24,24,25,26,26,27,27,28,29,
+ 29,30,30,30,31,32,32,33,33,33,
+ 34,34,35,35,35,36,36,36,37,37,
+ 37,38,38,63
+};
+
+static const byte renorm_table_32[32]={6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+static const byte renorm_table_256[256]={6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, };
+
+extern void arideco_start_decoding(DecodingEnvironmentPtr eep, unsigned char *code_buffer, int firstbyte, int *code_len);
+//extern int arideco_bits_read(const DecodingEnvironmentPtr dep);
+extern void arideco_done_decoding(DecodingEnvironmentPtr dep);
+extern void biari_init_context (int qp, BiContextTypePtr ctx, const char* ini);
+extern unsigned int biari_decode_symbol(DecodingEnvironmentPtr dep, BiContextTypePtr bi_ct );
+extern unsigned int biari_decode_symbol_eq_prob(DecodingEnvironmentPtr dep);
+extern unsigned int biari_decode_final(DecodingEnvironmentPtr dep);
+extern unsigned int getbyte(DecodingEnvironmentPtr dep);
+extern unsigned int getword(DecodingEnvironmentPtr dep);
+#endif // BIARIDECOD_H_
+
diff --git a/Src/h264dec/ldecod/inc/block.h b/Src/h264dec/ldecod/inc/block.h
new file mode 100644
index 00000000..d819b13c
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/block.h
@@ -0,0 +1,133 @@
+
+/*!
+ ************************************************************************
+ * \file block.h
+ *
+ * \brief
+ * definitions for block decoding functions
+ *
+ * \author
+ * Inge Lille-Langoy <inge.lille-langoy@telenor.com> \n
+ * Telenor Satellite Services \n
+ * P.O.Box 6914 St.Olavs plass \n
+ * N-0130 Oslo, Norway
+ *
+ ************************************************************************
+ */
+
+#ifndef _BLOCK_H_
+#define _BLOCK_H_
+
+#include "global.h"
+#include "transform8x8.h"
+
+static const byte QP_SCALE_CR[52]=
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,
+ 12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
+ 28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,
+ 37,38,38,38,39,39,39,39
+
+};
+
+//! look up tables for FRExt_chroma support
+static const unsigned char subblk_offset_x[3][8][4] =
+{
+ {
+ {0, 4, 0, 4},
+ {0, 4, 0, 4},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ },
+ {
+ {0, 4, 0, 4},
+ {0, 4, 0, 4},
+ {0, 4, 0, 4},
+ {0, 4, 0, 4},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ },
+ {
+ {0, 4, 0, 4},
+ {8,12, 8,12},
+ {0, 4, 0, 4},
+ {8,12, 8,12},
+ {0, 4, 0, 4},
+ {8,12, 8,12},
+ {0, 4, 0, 4},
+ {8,12, 8,12}
+ }
+};
+
+
+static const unsigned char subblk_offset_y[3][8][4] =
+{
+ {
+ {0, 0, 4, 4},
+ {0, 0, 4, 4},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0}
+ },
+ {
+ {0, 0, 4, 4},
+ {8, 8,12,12},
+ {0, 0, 4, 4},
+ {8, 8,12,12},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0}
+ },
+ {
+ {0, 0, 4, 4},
+ {0, 0, 4, 4},
+ {8, 8,12,12},
+ {8, 8,12,12},
+ {0, 0, 4, 4},
+ {0, 0, 4, 4},
+ {8, 8,12,12},
+ {8, 8,12,12}
+ }
+};
+
+static const byte decode_block_scan[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+static const int cof4_pos_to_subblock[4][4] = { { 0, 1, 4, 5}, {2,3,6,7}, {8,9,12,13}, {10,11,14,15} };
+
+extern void iMBtrans4x4(Macroblock *currMB, ColorPlane pl, int smb);
+extern void iMBtrans8x8(Macroblock *currMB, ColorPlane pl);
+
+extern void itrans_sp_cr(Macroblock *currMB, int uv);
+
+extern void intrapred_chroma (Macroblock *currMB, int uv);
+
+extern void Inv_Residual_trans_4x4(Macroblock *currMB, ColorPlane pl, int ioff, int joff);
+extern void Inv_Residual_trans_8x8(Macroblock *currMB, ColorPlane pl, int ioff,int joff);
+
+extern void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y);
+extern void itrans4x4_mmx(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y);
+extern int intrapred (Macroblock *currMB, ColorPlane pl, int ioff,int joff,int i4,int j4);
+extern void itrans_2 (Macroblock *currMB, ColorPlane pl);
+extern void iTransform (Macroblock *currMB, ColorPlane pl, int smb);
+
+extern void copy_image_data (imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x, int width, int height);
+extern void copy_image_data_16x16 (imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x);
+
+extern void copy_image_data_16x16_stride_c(struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source);
+extern void copy_image_data_16x16_stride_sse(struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source);
+extern void copy_image_data_8x8_stride (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source);
+extern void copy_image_data_8x8_stride2 (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y);
+extern void copy_image_data_4x4_stride (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y);
+extern void copy_image_data_stride (struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int width, int height);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/cabac.h b/Src/h264dec/ldecod/inc/cabac.h
new file mode 100644
index 00000000..5458d2be
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/cabac.h
@@ -0,0 +1,64 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ * cabac.h
+ *
+ * \brief
+ * Header file for entropy coding routines
+ *
+ * \author
+ * Detlev Marpe \n
+ * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+ *
+ * \date
+ * 21. Oct 2000 (Changes by Tobias Oelbaum 28.08.2001)
+ ***************************************************************************
+ */
+
+#ifndef _CABAC_H_
+#define _CABAC_H_
+
+#include "global.h"
+
+typedef struct Run_Level
+{
+ int level;
+ int run;
+} RunLevel;
+extern MotionInfoContexts* create_contexts_MotionInfo(void);
+extern TextureInfoContexts* create_contexts_TextureInfo(void);
+extern void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx);
+extern void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx);
+
+extern void cabac_new_slice(Slice *currSlice);
+
+extern int readMB_typeInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp);
+extern int readB8_typeInfo_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp);
+extern int readIntraPredMode_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp);
+extern char readRefFrame_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int x, int y);
+extern char readRefFrame_CABAC0(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int y);
+extern int readMVD_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int k, int list, int x, int y);
+extern int readCBP_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp);
+
+// readRunLevel_CABAC returns level and sets *run
+extern RunLevel readRunLevel_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int context);
+extern short readDquant_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp);
+extern char readCIPredMode_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp);
+extern int readMB_skip_flagInfo_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp);
+extern Boolean readFieldModeInfo_CABAC (Macroblock *currMB, DecodingEnvironmentPtr dep_dp);
+extern Boolean readMB_transform_size_flag_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp);
+
+extern void readIPCM_CABAC(Slice *currSlice, struct datapartition *dP);
+
+extern int cabac_startcode_follows(Slice *currSlice, int eos_bit);
+
+
+extern int check_next_mb_and_get_field_mode_CABAC(Slice *currSlice, DataPartition *act_dp);
+
+extern void CheckAvailabilityOfNeighborsCABAC(Macroblock *currMB);
+
+extern void set_read_and_store_CBP(Macroblock **currMB, int chroma_format_idc);
+
+#endif // _CABAC_H_
+
diff --git a/Src/h264dec/ldecod/inc/context_ini.h b/Src/h264dec/ldecod/inc/context_ini.h
new file mode 100644
index 00000000..73977be6
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/context_ini.h
@@ -0,0 +1,23 @@
+
+/*!
+ *************************************************************************************
+ * \file context_ini.h
+ *
+ * \brief
+ * CABAC context initializations
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Detlev Marpe <marpe@hhi.de>
+ * - Heiko Schwarz <hschwarz@hhi.de>
+ **************************************************************************************
+ */
+
+
+#ifndef _CONTEXT_INI_
+#define _CONTEXT_INI_
+
+extern void init_contexts (Slice *currslice);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/contributors.h b/Src/h264dec/ldecod/inc/contributors.h
new file mode 100644
index 00000000..3e462d0b
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/contributors.h
@@ -0,0 +1,223 @@
+
+/*! \file
+ * contributors.h
+ * \brief
+ * List of contributors and copyright information.
+ *
+ * \par Copyright statements
+ \verbatim
+ H.264 JM coder/decoder
+
+ Copyright (C) 2000 by
+ Telenor Satellite Services, Norway
+ Ericsson Radio Systems, Sweden
+ TELES AG, Germany
+ Nokia Inc., USA
+ Nokia Corporation, Finland
+ Siemens AG, Germany
+ Fraunhofer-Institute for Telecommunications Heinrich-Hertz-Institut (HHI), Germany
+ University of Hannover, Institut of Communication Theory and Signal Processing, Germany
+ TICSP, Tampere University of Technology, Finland
+ Munich University of Technology, Institute for Communications Engineering, Germany
+ Videolocus, Canada
+ Motorola Inc., USA
+ Microsoft Corp., USA
+ Apple Computer, Inc.
+ RealNetworks, Inc., USA
+ Thomson, Inc., USA
+ Sejong Univ., Digital Media System Lab., Korea
+ \endverbatim
+ \par Full Contact Information
+ \verbatim
+
+ Lowell Winger <lwinger@videolocus.com><lwinger@uwaterloo.ca>
+ Guy Côté <gcote@videolocus.com>
+ Michael Gallant <mgallant@videolocus.com>
+ VideoLocus Inc.
+ 97 Randall Dr.
+ Waterloo, ON, Canada N2V1C5
+
+ Inge Lille-Langøy <inge.lille-langoy@telenor.com>
+ Telenor Satellite Services
+ P.O.Box 6914 St.Olavs plass
+ N-0130 Oslo, Norway
+
+ Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
+ Ericsson Radio Systems
+ KI/ERA/T/VV
+ 164 80 Stockholm, Sweden
+
+ Stephan Wenger <stewe@cs.tu-berlin.de>
+ TU Berlin / TELES AG
+ Sekr. FR 6-3
+ Franklinstr. 28-29
+ D-10587 Berlin, Germany
+
+ Jani Lainema <jani.lainema@nokia.com>
+ Nokia Inc. / Nokia Research Center
+ 6000 Connection Drive
+ Irving, TX 75039, USA
+
+ Sebastian Purreiter <sebastian.purreiter@mch.siemens.de>
+ Siemens AG
+ ICM MD MP RD MCH 83
+ P.O.Box 80 17 07
+ D-81617 Munich, Germany
+
+ Thomas Wedi <wedi@tnt.uni-hannover.de>
+ University of Hannover
+ Institut of Communication Theory and Signal Processing
+ Appelstr. 9a
+ 30167 Hannover, Germany
+
+ Guido Heising
+ Fraunhofer-Institute for Telecommunications
+ Heinrich-Hertz-Institut (HHI)
+ Einsteinufer 37
+ 10587 Berlin
+ Germany
+
+ Gabi Blaettermann
+ Fraunhofer-Institute for Telecommunications
+ Heinrich-Hertz-Institut (HHI)
+ Einsteinufer 37
+ 10587 Berlin
+ Germany
+
+ Detlev Marpe <marpe@hhi.de>
+ Fraunhofer-Institute for Telecommunications
+ Heinrich-Hertz-Institut (HHI)
+ Einsteinufer 37
+ 10587 Berlin
+ Germany
+
+ Ragip Kurceren <ragip.kurceren@nokia.com>
+ Nokia Inc. / Nokia Research Center
+ 6000 Connection Drive
+ Irving, TX 75039, USA
+
+ Viktor Varsa <viktor.varsa@nokia.com>
+ Nokia Inc. / Nokia Research Center
+ 6000 Connection Drive
+ Irving, TX 75039, USA
+
+ Ye-Kui Wang <wyk@ieee.org>
+ Tampere University of Technology
+ Tampere International Center for Signal Processing
+ 33720 Tampere, Finland
+
+ Ari Hourunranta <ari.hourunranta@nokia.com>
+ Nokia Corporation / Nokia Mobile Phones
+ P.O. Box 88
+ 33721 Tampere, Finland
+
+ Yann Le Maguet <yann.lemaguet@philips.com>
+ Philips Research France
+
+ Dong Tian <tian@cs.tut.fi>
+ Tampere University of Technology
+ Tampere International Center for Signal Processing
+ 33720 Tampere, Finland
+
+ Miska M. Hannuksela <miska.hannuksela@nokia.com>
+ Nokia Corporation / Nokia Mobile Phones
+ P.O. Box 88
+ 33721 Tampere, Finland
+
+ Karsten Suehring <suehring@hhi.de>
+ Fraunhofer-Institute for Telecommunications
+ Heinrich-Hertz-Institut (HHI)
+ Einsteinufer 37
+ 10587 Berlin
+ Germany
+
+ Heiko Schwarz <hschwarz@hhi.de>
+ Fraunhofer-Institute for Telecommunications
+ Heinrich-Hertz-Institut (HHI)
+ Einsteinufer 37
+ 10587 Berlin
+ Germany
+
+ Tobias Oelbaum <drehvial@gmx.net>
+ Institute for Communications Engineering
+ Munich University of Technology
+ Germany
+
+ Limin Wang <liwang@gi.com>
+ Krit Panusopone <kpanusopone@gi.com>
+ Rajeev Gandhi <rgandhi@gi.com>
+ Yue Yu <yyu@gi.com>
+ Motorola Inc.
+ 6450 Sequence Drive
+ San Diego, CA 92121 USA
+
+ Feng Wu <fengwu@microsoft.com>
+ Xiaoyan Sun <sunxiaoyan@msrchina.research.microsoft.com>
+ Microsoft Research Asia
+ 3/F, Beijing Sigma Center
+ No.49, Zhichun Road, Hai Dian District,
+ Beijing China 100080
+
+ Yoshihiro Kikuchi <yoshihiro.kikuchi@toshiba.co.jp>
+ Takeshi Chujoh <takeshi.chujoh@toshiba.co.jp>
+ Toshiba Corporation
+ Research and Development Center
+ Kawasaki 212-8582, Japan
+
+ Shinya Kadono <kadono@drl.mei.co.jp>
+ Matsushita Electric Industrial Co., Ltd.
+ 1006 Kadoma, Kadoma
+ Osaka 663-8113, Japan
+
+ Dzung Hoang <dthoang@yahoo.com>
+ 10533 Roy Butler Dr.
+ Austin, TX 78717
+
+ Eric Viscito <eric@ev-consulting.com>
+ eV Consulting
+ 52 Tracy Ln
+ Shelburne, VT 05482 USA
+
+ Barry Haskell
+ Apple Computer, Inc. <bhaskell@apple.com>
+ 2 Infinite Loop
+ Cupertino, California 95014
+
+ Greg Conklin
+ RealNetworks, Inc. <gregc@real.com>
+ 2601 Elliott Ave
+ Seattle, WA 98101
+
+ Jill Boyce <jill.boyce@thomson.net>
+ Cristina Gomila <cristina.gomila@thomson.net>
+ Thomson
+ 2 Independence Way
+ Princeton, NJ 08540
+
+ Alexis Michael Tourapis <alexismt@ieee.org><atour@dolby.com>
+ Athanasios Leontaris <aleon@dolby.com>
+ Dolby Laboratories Inc.
+ 3601 West Alameda Ave.
+ Burbank, CA 91505
+
+ Saurav K Bandyopadhyay <saurav@ieee.org>
+ Purvin Pandit <Purvin.Pandit@thomson.net>
+ Zhenyu Wu <Zhenyu.Wu@thomson.net>
+ Thomson Inc.
+ 2 Independence Way
+ Princeton, NJ 08540
+
+ Shun-ichi Sekiguchi <Sekiguchi.Shunichi@eb.MitsubishiElectric.co.jp>
+ Information Technology R&D Center,
+ Mitsubishi Electric Corporation
+ 5-1-1, Ofuna, Kamakura, Japan
+
+ Yung-Lyul Lee <yllee@sejong.ac.kr>
+ Ki-Hun Han <khhan@dms.sejong.ac.kr>
+ Department of Computer Engineering,
+ Sejong University
+ 98 Kunja-Dong, Kwangjin-Gu, Seoul 143-747, Korea
+
+ \endverbatim
+*/
+
diff --git a/Src/h264dec/ldecod/inc/defines.h b/Src/h264dec/ldecod/inc/defines.h
new file mode 100644
index 00000000..edc50563
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/defines.h
@@ -0,0 +1,273 @@
+
+/*!
+ **************************************************************************
+ * \file defines.h
+ *
+ * \brief
+ * Header file containing some useful global definitions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Detlev Marpe
+ * - Karsten Sühring <suehring@hhi.de>
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *
+ * \date
+ * 21. March 2001
+ **************************************************************************
+ */
+
+
+#ifndef H264_DEFINES_H_
+#define H264_DEFINES_H_
+#pragma once
+
+#if defined _DEBUG
+# define TRACE 0 //!< 0:Trace off 1:Trace on 2:detailed CABAC context information
+#else
+# define TRACE 0 //!< 0:Trace off 1:Trace on 2:detailed CABAC context information
+#endif
+
+#define JM "16.1 (FRExt)"
+#define VERSION "16.1"
+#define EXT_VERSION "(FRExt)"
+
+#define DUMP_DPB 0 //!< Dump DPB info for debug purposes
+#define PAIR_FIELDS_IN_OUTPUT 0 //!< Pair field pictures for output purposes
+#define IMGTYPE 0 //!< Define imgpel size type. 0 implies byte (cannot handle >8 bit depths) and 1 implies unsigned short
+#define ENABLE_FIELD_CTX 1 //!< Enables Field mode related context types for CABAC
+#define ENABLE_HIGH444_CTX 1 //!< Enables High 444 profile context types for CABAC.
+#define ZEROSNR 0 //!< PSNR computation method
+#define ENABLE_OUTPUT_TONEMAPPING 1 //!< enable tone map the output if tone mapping SEI present
+
+
+#include "typedefs.h"
+
+
+#define H264_MEMORY_ALIGNMENT 32
+
+//#define MAX_NUM_SLICES 150
+#define MAX_NUM_SLICES 50
+#define MAX_REFERENCE_PICTURES 32 //!< H.264 allows 32 fields
+#define MAX_CODED_FRAME_SIZE 8000000 //!< bytes for one frame
+
+//AVC Profile IDC definitions
+#define BASELINE 66 //!< YUV 4:2:0/8 "Baseline"
+#define MAIN 77 //!< YUV 4:2:0/8 "Main"
+#define EXTENDED 88 //!< YUV 4:2:0/8 "Extended"
+#define FREXT_HP 100 //!< YUV 4:2:0/8 "High"
+#define FREXT_Hi10P 110 //!< YUV 4:2:0/10 "High 10"
+#define FREXT_Hi422 122 //!< YUV 4:2:2/10 "High 4:2:2"
+#define FREXT_Hi444 244 //!< YUV 4:4:4/14 "High 4:4:4"
+#define FREXT_CAVLC444 44 //!< YUV 4:4:4/14 "CAVLC 4:4:4"
+
+
+#define FILE_NAME_SIZE 255
+#define INPUT_TEXT_SIZE 1024
+
+#if (ENABLE_HIGH444_CTX == 1)
+# define NUM_BLOCK_TYPES 22
+#else
+# define NUM_BLOCK_TYPES 10
+#endif
+
+
+//#define _LEAKYBUCKET_
+
+#define BLOCK_SHIFT 2
+#define BLOCK_SIZE 4
+#define BLOCK_SIZE_8x8 8
+#define SMB_BLOCK_SIZE 8
+#define BLOCK_PIXELS 16
+#define MB_BLOCK_SIZE 16
+#define MB_PIXELS 256 // MB_BLOCK_SIZE * MB_BLOCK_SIZE
+#define MB_PIXELS_SHIFT 8 // log2(MB_BLOCK_SIZE * MB_BLOCK_SIZE)
+#define MB_BLOCK_SHIFT 4
+#define BLOCK_MULTIPLE 4 // (MB_BLOCK_SIZE/BLOCK_SIZE)
+#define MB_BLOCK_PARTITIONS 16 // (BLOCK_MULTIPLE * BLOCK_MULTIPLE)
+#define BLOCK_CONTEXT 64 // (4 * MB_BLOCK_PARTITIONS)
+
+// These variables relate to the subpel accuracy supported by the software (1/4)
+#define BLOCK_SIZE_SP 16 // BLOCK_SIZE << 2
+#define BLOCK_SIZE_8x8_SP 32 // BLOCK_SIZE8x8 << 2
+
+// Available MB modes
+enum {
+ PSKIP = 0,
+ BSKIP_DIRECT = 0,
+ P16x16 = 1,
+ P16x8 = 2,
+ P8x16 = 3,
+ SMB8x8 = 4,
+ SMB8x4 = 5,
+ SMB4x8 = 6,
+ SMB4x4 = 7,
+ P8x8 = 8,
+ I4MB = 9,
+ I16MB = 10,
+ IBLOCK = 11,
+ SI4MB = 12,
+ I8MB = 13,
+ IPCM = 14,
+ MAXMODE = 15
+} ;//MBModeTypes;
+
+// number of intra prediction modes
+#define NO_INTRA_PMODE 9
+
+// Direct Mode types
+enum {
+ DIR_TEMPORAL = 0, //!< Temporal Direct Mode
+ DIR_SPATIAL = 1 //!< Spatial Direct Mode
+} ;//DirectModes;
+
+// CAVLC block types
+enum {
+ LUMA = 0,
+ LUMA_INTRA16x16DC = 1,
+ LUMA_INTRA16x16AC = 2,
+ CB = 3,
+ CB_INTRA16x16DC = 4,
+ CB_INTRA16x16AC = 5,
+ CR = 8,
+ CR_INTRA16x16DC = 9,
+ CR_INTRA16x16AC = 10
+} ;//CAVLCBlockTypes;
+
+// CABAC block types
+enum {
+ LUMA_16DC = 0,
+ LUMA_16AC = 1,
+ LUMA_8x8 = 2,
+ LUMA_8x4 = 3,
+ LUMA_4x8 = 4,
+ LUMA_4x4 = 5,
+ CHROMA_DC = 6,
+ CHROMA_AC = 7,
+ CHROMA_DC_2x4 = 8,
+ CHROMA_DC_4x4 = 9,
+ CB_16DC = 10,
+ CB_16AC = 11,
+ CB_8x8 = 12,
+ CB_8x4 = 13,
+ CB_4x8 = 14,
+ CB_4x4 = 15,
+ CR_16DC = 16,
+ CR_16AC = 17,
+ CR_8x8 = 18,
+ CR_8x4 = 19,
+ CR_4x8 = 20,
+ CR_4x4 = 21
+} ;//CABACBlockTypes;
+
+// Macro defines
+#define Q_BITS 15
+#define DQ_BITS 6
+#define Q_BITS_8 16
+#define DQ_BITS_8 6
+
+//#define IS_INTRA(MB) ((MB)->mb_type==I4MB || (MB)->mb_type==I16MB ||(MB)->mb_type==IPCM || (MB)->mb_type==I8MB || (MB)->mb_type==SI4MB)
+#define IS_INTRA(MB) (!!((1 << (MB)->mb_type) & ((1<<I4MB) | (1<<I16MB) | (1<<IPCM) | (1<<I8MB) | (1<<SI4MB))))
+#define IS_I16MB(MB) ((MB)->mb_type==I16MB || (MB)->mb_type==IPCM)
+
+#define IS_INTER(MB) (!IS_INTRA(MB))
+//#define IS_INTER(MB) ((MB)->mb_type!=SI4MB && (MB)->mb_type!=I4MB && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB && (MB)->mb_type!=IPCM)
+#define IS_INTERMV(MB) ((MB)->mb_type!=I4MB && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB && (MB)->mb_type!=0 && (MB)->mb_type!=IPCM)
+#define IS_DIRECT(MB) ((MB)->mb_type==0 && (currSlice->slice_type == B_SLICE ))
+#define IS_SKIP(MB) ((MB)->mb_type==0 && (currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE))
+
+#define TOTRUN_NUM 15
+#define RUNBEFORE_NUM 7
+#define RUNBEFORE_NUM_M1 6
+
+// Quantization parameter range
+#define MIN_QP 0
+#define MAX_QP 51
+// 4x4 intra prediction modes
+enum {
+ VERT_PRED = 0,
+ HOR_PRED = 1,
+ DC_PRED = 2,
+ DIAG_DOWN_LEFT_PRED = 3,
+ DIAG_DOWN_RIGHT_PRED = 4,
+ VERT_RIGHT_PRED = 5,
+ HOR_DOWN_PRED = 6,
+ VERT_LEFT_PRED = 7,
+ HOR_UP_PRED = 8
+} ;//I4x4PredModes;
+
+// 16x16 intra prediction modes
+enum {
+ VERT_PRED_16 = 0,
+ HOR_PRED_16 = 1,
+ DC_PRED_16 = 2,
+ PLANE_16 = 3
+} ;//I16x16PredModes;
+
+// 8x8 chroma intra prediction modes
+enum {
+ DC_PRED_8 = 0,
+ HOR_PRED_8 = 1,
+ VERT_PRED_8 = 2,
+ PLANE_8 = 3
+} ;//I8x8PredModes;
+
+enum {
+ EOS = 1, //!< End Of Sequence
+ SOP = 2, //!< Start Of Picture
+ SOS = 3 //!< Start Of Slice
+};
+
+// MV Prediction types
+enum {
+ MVPRED_MEDIAN = 0,
+ MVPRED_L = 1,
+ MVPRED_U = 2,
+ MVPRED_UR = 3
+} ;//MVPredTypes;
+
+enum {
+ DECODING_OK = 0,
+ SEARCH_SYNC = 1,
+ PICTURE_DECODED = 2
+};
+
+#define LAMBDA_ACCURACY_BITS 16
+#define INVALIDINDEX (-135792468)
+
+#define RC_MAX_TEMPORAL_LEVELS 5
+
+//Start code and Emulation Prevention need this to be defined in identical manner at encoder and decoder
+#define ZEROBYTES_SHORTSTARTCODE 2 //indicates the number of zero bytes in the short start-code prefix
+
+#define MAX_PLANE 3
+#define IS_INDEPENDENT(IMG) ((IMG)->separate_colour_plane_flag)
+#define IS_FREXT_PROFILE(profile_idc) ( profile_idc>=FREXT_HP || profile_idc == FREXT_CAVLC444 )
+#define HI_INTRA_ONLY_PROFILE (((p_Vid->active_sps->profile_idc>=FREXT_Hi10P)&&(p_Vid->active_sps->constrained_set3_flag))||(p_Vid->active_sps->profile_idc==FREXT_CAVLC444))
+
+enum
+{
+ VUI_AR_UNDEFINED = 0,
+ VUI_AR_SQUARE = 1, // 1:1
+ VUI_AR_12_11 = 2, // 12:11
+ VUI_AR_10_11 = 3, // 10:11
+ VUI_AR_16_11 = 4, // 16:11
+ VUI_AR_40_33 = 5, // 40:33
+ VUI_AR_24_11 = 6, // 24:11
+ VUI_AR_20_11 = 7, // 20:11
+ VUI_AR_32_11 = 8, // 32:11
+ VUI_AR_80_33 = 9, // 80:33
+ VUI_AR_18_11 = 10, // 18:11
+ VUI_AR_15_11 = 11, // 15:11
+ VUI_AR_64_33 = 12, // 64:33
+VUI_AR_160_99 = 13, // 160:99
+VUI_AR_4_3 = 14, // 4:3
+VUI_AR_3_2 = 15, // 3:2
+VUI_AR_2_1 = 16, // 2:1
+
+
+ VUI_EXTENDED_SAR = 255,
+};
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/elements.h b/Src/h264dec/ldecod/inc/elements.h
new file mode 100644
index 00000000..f115bff6
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/elements.h
@@ -0,0 +1,112 @@
+
+/*!
+ *************************************************************************************
+ * \file elements.h
+ *
+ * \brief
+ * Header file for elements in H.264 streams
+ *
+ * \date
+ * 6.10.2000
+ *
+ * \version
+ * 1.0
+ *
+ * \author
+ * Sebastian Purreiter <sebastian.purreiter@mch.siemens.de> \n
+ * Siemens AG, Information and Communication Mobile \n
+ * P.O.Box 80 17 07 \n
+ * D-81617 Munich, Germany \n
+ *************************************************************************************
+ */
+
+#ifndef _ELEMENTS_H_
+#define _ELEMENTS_H_
+
+/*!
+ * definition of H.264 syntax elements
+ * order of elements follow dependencies for picture reconstruction
+ */
+/*!
+ * \brief Assignment of old TYPE partition elements to new
+ * elements
+ *
+ * old element | new elements
+ * ----------------+-------------------------------------------------------------------
+ * TYPE_HEADER | SE_HEADER, SE_PTYPE
+ * TYPE_MBHEADER | SE_MBTYPE, SE_REFFRAME, SE_INTRAPREDMODE
+ * TYPE_MVD | SE_MVD
+ * TYPE_CBP | SE_CBP_INTRA, SE_CBP_INTER
+ * SE_DELTA_QUANT_INTER
+ * SE_DELTA_QUANT_INTRA
+ * TYPE_COEFF_Y | SE_LUM_DC_INTRA, SE_LUM_AC_INTRA, SE_LUM_DC_INTER, SE_LUM_AC_INTER
+ * TYPE_2x2DC | SE_CHR_DC_INTRA, SE_CHR_DC_INTER
+ * TYPE_COEFF_C | SE_CHR_AC_INTRA, SE_CHR_AC_INTER
+ * TYPE_EOS | SE_EOS
+*/
+
+#define SE_HEADER 0
+#define SE_PTYPE 1
+#define SE_MBTYPE 2
+#define SE_REFFRAME 3
+#define SE_INTRAPREDMODE 4
+#define SE_MVD 5
+#define SE_CBP_INTRA 6
+#define SE_LUM_DC_INTRA 7
+#define SE_CHR_DC_INTRA 8
+#define SE_LUM_AC_INTRA 9
+#define SE_CHR_AC_INTRA 10
+#define SE_CBP_INTER 11
+#define SE_LUM_DC_INTER 12
+#define SE_CHR_DC_INTER 13
+#define SE_LUM_AC_INTER 14
+#define SE_CHR_AC_INTER 15
+#define SE_DELTA_QUANT_INTER 16
+#define SE_DELTA_QUANT_INTRA 17
+#define SE_BFRAME 18
+#define SE_EOS 19
+#define SE_MAX_ELEMENTS 20
+
+
+#define NO_EC 0 //!< no error concealment necessary
+#define EC_REQ 1 //!< error concealment required
+#define EC_SYNC 2 //!< search and sync on next header element
+
+#define MAXPARTITIONMODES 2 //!< maximum possible partition modes as defined in assignSE2partition[][]
+
+/*!
+ * \brief lookup-table to assign different elements to partition
+ *
+ * \note here we defined up to 6 different partitions similar to
+ * document Q15-k-18 described in the PROGFRAMEMODE.
+ * The Sliceheader contains the PSYNC information. \par
+ *
+ * Elements inside a partition are not ordered. They are
+ * ordered by occurence in the stream.
+ * Assumption: Only partitionlosses are considered. \par
+ *
+ * The texture elements luminance and chrominance are
+ * not ordered in the progressive form
+ * This may be changed in image.c \par
+ *
+ * We also defined the proposed internet partition mode
+ * of Stephan Wenger here. To select the desired mode
+ * uncomment one of the two following lines. \par
+ *
+ * -IMPORTANT:
+ * Picture- or Sliceheaders must be assigned to partition 0. \par
+ * Furthermore partitions must follow syntax dependencies as
+ * outlined in document Q15-J-23.
+ */
+
+
+static const byte assignSE2partition[][SE_MAX_ELEMENTS] =
+{
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 // element number (do not uncomment)
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, //!< all elements in one partition no data partitioning
+ { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 } //!< three partitions per slice
+};
+
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/erc_api.h b/Src/h264dec/ldecod/inc/erc_api.h
new file mode 100644
index 00000000..428c4ed3
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/erc_api.h
@@ -0,0 +1,159 @@
+
+/*!
+ ************************************************************************
+ * \file erc_api.h
+ *
+ * \brief
+ * External (still inside video decoder) interface for error concealment module
+ *
+ * \author
+ * - Ari Hourunranta <ari.hourunranta@nokia.com>
+ * - Ye-Kui Wang <wyk@ieee.org>
+ * - Jill Boyce <jill.boyce@thomson.net>
+ * - Saurav K Bandyopadhyay <saurav@ieee.org>
+ * - Zhenyu Wu <Zhenyu.Wu@thomson.net
+ * - Purvin Pandit <Purvin.Pandit@thomson.net>
+ *
+ * ************************************************************************
+ */
+
+
+#ifndef _ERC_API_H_
+#define _ERC_API_H_
+
+#include "erc_globals.h"
+
+/*
+* Defines
+*/
+
+/* If the average motion vector of the correctly received macroblocks is less than the
+threshold, concealByCopy is used, otherwise concealByTrial is used. */
+#define MVPERMB_THR 8
+
+/* used to determine the size of the allocated memory for a temporal Region (MB) */
+#define DEF_REGION_SIZE 384 /* 8*8*6 */
+
+#define ERC_BLOCK_OK 3
+#define ERC_BLOCK_CONCEALED 2
+#define ERC_BLOCK_CORRUPTED 1
+#define ERC_BLOCK_EMPTY 0
+
+
+/*
+* Functions to convert MBNum representation to blockNum
+*/
+
+#define xPosYBlock(currYBlockNum,picSizeX) \
+((currYBlockNum)%((picSizeX)>>3))
+
+#define yPosYBlock(currYBlockNum,picSizeX) \
+((currYBlockNum)/((picSizeX)>>3))
+
+#define xPosMB(currMBNum,picSizeX) \
+((currMBNum)%((picSizeX)>>4))
+
+#define yPosMB(currMBNum,picSizeX) \
+((currMBNum)/((picSizeX)>>4))
+
+#define MBxy2YBlock(currXPos,currYPos,comp,picSizeX) \
+((((currYPos)<<1)+((comp)>>1))*((picSizeX)>>3)+((currXPos)<<1)+((comp)&1))
+
+#define MBNum2YBlock(currMBNum,comp,picSizeX) \
+MBxy2YBlock(xPosMB((currMBNum),(picSizeX)),yPosMB((currMBNum),(picSizeX)),(comp),(picSizeX))
+
+
+/*
+* typedefs
+*/
+
+/* segment data structure */
+typedef struct ercSegment_s
+{
+ int startMBPos;
+ int endMBPos;
+ int fCorrupted;
+} ercSegment_t;
+
+/* Error detector & concealment instance data structure */
+typedef struct ercVariables_s
+{
+ /* Number of macroblocks (size or size/4 of the arrays) */
+ int nOfMBs;
+ /* Number of segments (slices) in frame */
+ int nOfSegments;
+
+ /* Array for conditions of Y blocks */
+ int *yCondition;
+ /* Array for conditions of U blocks */
+ int *uCondition;
+ /* Array for conditions of V blocks */
+ int *vCondition;
+
+ /* Array for Slice level information */
+ ercSegment_t *segments;
+ int currSegment;
+
+ /* Conditions of the MBs of the previous frame */
+ int *prevFrameYCondition;
+
+ /* Flag telling if the current segment was found to be corrupted */
+ int currSegmentCorrupted;
+ /* Counter for corrupted segments per picture */
+ int nOfCorruptedSegments;
+
+ /* State variables for error detector and concealer */
+ int concealment;
+
+} ercVariables_t;
+
+/*
+* External function interface
+*/
+
+void ercInit(VideoParameters *p_Vid, int pic_sizex, int pic_sizey, int flag);
+ercVariables_t *ercOpen( void );
+void ercReset( ercVariables_t *errorVar, int nOfMBs, int numOfSegments, int picSizeX );
+void ercClose( VideoParameters *p_Vid, ercVariables_t *errorVar );
+void ercSetErrorConcealment( ercVariables_t *errorVar, int value );
+
+void ercStartSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar );
+void ercStopSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar );
+void ercMarkCurrSegmentLost(int picSizeX, ercVariables_t *errorVar );
+void ercMarkCurrSegmentOK(int picSizeX, ercVariables_t *errorVar );
+void ercMarkCurrMBConcealed( int currMBNum, int comp, int picSizeX, ercVariables_t *errorVar );
+
+int ercConcealIntraFrame( VideoParameters *p_Vid, frame *recfr, int picSizeX, int picSizeY, ercVariables_t *errorVar );
+int ercConcealInterFrame( frame *recfr, objectBuffer_t *object_list,
+ int picSizeX, int picSizeY, ercVariables_t *errorVar, int chroma_format_idc );
+
+
+/* Thomson APIs for concealing entire frame loss */
+
+#include "mbuffer.h"
+#include "output.h"
+
+struct concealment_node {
+ StorablePicture* picture;
+ int missingpocs;
+ struct concealment_node *next;
+};
+
+extern struct concealment_node * init_node(StorablePicture* , int );
+extern void print_node( struct concealment_node * );
+extern void print_list( struct concealment_node * );
+extern void init_lists_for_non_reference_loss(VideoParameters *p_Vid, int , PictureStructure );
+
+extern void conceal_non_ref_pics(VideoParameters *p_Vid, int diff);
+extern void conceal_lost_frames(VideoParameters *p_Vid);
+
+extern void sliding_window_poc_management(DecodedPictureBuffer *p_Dpb, StorablePicture *p);
+
+extern void write_lost_non_ref_pic(VideoParameters *p_Vid, int poc);
+extern void write_lost_ref_after_idr(VideoParameters *p_Vid, int pos);
+
+extern int comp(const void *, const void *);
+
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/erc_do.h b/Src/h264dec/ldecod/inc/erc_do.h
new file mode 100644
index 00000000..9879222f
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/erc_do.h
@@ -0,0 +1,44 @@
+
+/*!
+ ************************************************************************
+ * \file erc_do.h
+ *
+ * \brief
+ * Header for the I & P frame error concealment common functions
+ *
+ * \author
+ * - Viktor Varsa <viktor.varsa@nokia.com>
+ * - Ye-Kui Wang <wyk@ieee.org>
+ *
+ ************************************************************************
+ */
+
+#ifndef _ERC_DO_H_
+#define _ERC_DO_H_
+
+
+#include "erc_api.h"
+
+void ercPixConcealIMB (VideoParameters *p_Vid, imgpel *currFrame, int row, int column, int predBlocks[], int frameWidth, int mbWidthInBlocks);
+
+int ercCollect8PredBlocks( int predBlocks[], int currRow, int currColumn, int *condition,
+ int maxRow, int maxColumn, int step, byte fNoCornerNeigh );
+int ercCollectColumnBlocks( int predBlocks[], int currRow, int currColumn, int *condition, int maxRow, int maxColumn, int step );
+
+#define isSplitted(object_list,currMBNum) \
+ ((object_list+((currMBNum)<<2))->regionMode >= REGMODE_SPLITTED)
+
+/* this can be used as isBlock(...,INTRA) or isBlock(...,INTER_COPY) */
+#define isBlock(object_list,currMBNum,comp,regMode) \
+ (isSplitted(object_list,currMBNum) ? \
+ ((object_list+((currMBNum)<<2)+(comp))->regionMode == REGMODE_##regMode##_8x8) : \
+ ((object_list+((currMBNum)<<2))->regionMode == REGMODE_##regMode))
+
+/* this can be used as getParam(...,mv) or getParam(...,xMin) or getParam(...,yMin) */
+#define getParam(object_list,currMBNum,comp,param) \
+ (isSplitted(object_list,currMBNum) ? \
+ ((object_list+((currMBNum)<<2)+(comp))->param) : \
+ ((object_list+((currMBNum)<<2))->param))
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/erc_globals.h b/Src/h264dec/ldecod/inc/erc_globals.h
new file mode 100644
index 00000000..63ba4e2e
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/erc_globals.h
@@ -0,0 +1,52 @@
+
+/*!
+ ************************************************************************
+ * \file erc_globals.h
+ *
+ * \brief
+ * global header file for error concealment module
+ *
+ * \author
+ * - Viktor Varsa <viktor.varsa@nokia.com>
+ * - Ye-Kui Wang <wyk@ieee.org>
+ ************************************************************************
+ */
+
+#ifndef _ERC_GLOBALS_H_
+#define _ERC_GLOBALS_H_
+
+#include "defines.h"
+
+/* "block" means an 8x8 pixel area */
+
+/* Region modes */
+#define REGMODE_INTER_COPY 0 //!< Copy region
+#define REGMODE_INTER_PRED 1 //!< Inter region with motion vectors
+#define REGMODE_INTRA 2 //!< Intra region
+#define REGMODE_SPLITTED 3 //!< Any region mode higher than this indicates that the region
+ //!< is splitted which means 8x8 block
+#define REGMODE_INTER_COPY_8x8 4
+#define REGMODE_INTER_PRED_8x8 5
+#define REGMODE_INTRA_8x8 6
+
+//! YUV pixel domain image arrays for a video frame
+typedef struct frame_s
+{
+ VideoParameters *p_Vid;
+ imgpel *yptr;
+ imgpel *uptr;
+ imgpel *vptr;
+} frame;
+
+//! region structure stores information about a region that is needed for concealment
+typedef struct object_buffer
+{
+ byte regionMode; //!< region mode as above
+ int xMin; //!< X coordinate of the pixel position of the top-left corner of the region
+ int yMin; //!< Y coordinate of the pixel position of the top-left corner of the region
+ short mv[3]; //!< motion vectors in 1/4 pixel units: mvx = mv[0], mvy = mv[1],
+ //!< and ref_frame = mv[2]
+} objectBuffer_t;
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/errorconcealment.h b/Src/h264dec/ldecod/inc/errorconcealment.h
new file mode 100644
index 00000000..36650e25
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/errorconcealment.h
@@ -0,0 +1,20 @@
+
+
+/*!
+ ****************************************************************************
+ * \file errorconcealment.h
+ *
+ * \brief
+ * Header file for errorconcealment.c
+ *
+ ****************************************************************************
+ */
+
+#ifndef _ERRORCONCEALMENT_H_
+#define _ERRORCONCEALMENT_H_
+
+extern int set_ec_flag(VideoParameters *p_Vid, int se);
+extern void reset_ec_flags(VideoParameters *p_Vid);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/fmo.h b/Src/h264dec/ldecod/inc/fmo.h
new file mode 100644
index 00000000..df749bba
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/fmo.h
@@ -0,0 +1,30 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file fmo.h
+ *
+ * \brief
+ * Support for Flexilble Macroblock Ordering (FMO)
+ *
+ * \date
+ * 19 June, 2002
+ *
+ * \author
+ * Stephan Wenger stewe@cs.tu-berlin.de
+ **************************************************************************/
+
+#ifndef _FMO_H_
+#define _FMO_H_
+
+
+extern int fmo_init (VideoParameters *p_Vid);
+extern int FmoFinit (VideoParameters *p_Vid);
+
+extern int FmoGetNumberOfSliceGroup(VideoParameters *p_Vid);
+extern int FmoGetLastMBOfPicture (VideoParameters *p_Vid);
+extern int FmoGetLastMBInSliceGroup(VideoParameters *p_Vid, int SliceGroup);
+extern int FmoGetSliceGroupId (VideoParameters *p_Vid, int mb);
+extern int FmoGetNextMBNr (VideoParameters *p_Vid, int CurrentMbNr);
+
+#endif
diff --git a/Src/h264dec/ldecod/inc/global.h b/Src/h264dec/ldecod/inc/global.h
new file mode 100644
index 00000000..6d2677e6
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/global.h
@@ -0,0 +1,1230 @@
+
+/*!
+ ************************************************************************
+ * \file
+ * global.h
+ * \brief
+ * global definitions for H.264 decoder.
+ * \author
+ * Copyright (C) 1999 Telenor Satellite Services,Norway
+ * Ericsson Radio Systems, Sweden
+ *
+ * Inge Lille-Langoy <inge.lille-langoy@telenor.com>
+ *
+ * Telenor Satellite Services
+ * Keysers gt.13 tel.: +47 23 13 86 98
+ * N-0130 Oslo,Norway fax.: +47 22 77 79 80
+ *
+ * Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
+ *
+ * Ericsson Radio Systems
+ * KI/ERA/T/VV
+ * 164 80 Stockholm, Sweden
+ *
+ ************************************************************************
+ */
+#ifndef _GLOBAL_H_
+#define _GLOBAL_H_
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/timeb.h>
+
+#include <bfc/platform/types.h>
+#include "win32.h"
+#include "defines.h"
+#include "ifunctions.h"
+#include "parsetcommon.h"
+#include "types.h"
+#include "frame.h"
+#include "nalucommon.h"
+#include "memcache.h"
+#include <mmintrin.h>
+#ifdef H264_IPP
+//#include "../tools/staticlib/ipp_px.h"
+#include "ippdefs.h"
+#include "ippcore.h"
+#include "ipps.h"
+#include "ippi.h"
+#include "ippvc.h"
+#endif
+/* benski> not the best place for this but it works for now */
+#ifdef _M_IX86
+// must be a multiple of 16
+#pragma warning(disable: 4799)
+static inline void memzero_cache32(void *dst, unsigned long i)
+{
+
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+loopwrite:
+
+ movq 0[edi], mm0
+ movq 8[edi], mm0
+ movq 16[edi], mm0
+ movq 24[edi], mm0
+ lea edi, [edi+32]
+ sub i, 32
+ jg loopwrite
+
+ }
+}
+
+static inline void memzero_fast32(void *dst, unsigned long i)
+{
+
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+loopwrite:
+
+ movntq 0[edi], mm0
+ movntq 8[edi], mm0
+ movntq 16[edi], mm0
+ movntq 24[edi], mm0
+
+ lea edi, [edi+32]
+ sub i, 32
+ jg loopwrite
+
+ }
+}
+
+static inline void memzero64(void *dst)
+{
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+ movq 0[edi], mm0
+ movq 8[edi], mm0
+ movq 16[edi], mm0
+ movq 24[edi], mm0
+ movq 32[edi], mm0
+ movq 40[edi], mm0
+ movq 48[edi], mm0
+ movq 56[edi], mm0
+ }
+}
+
+static inline void memzero128(void *dst)
+{
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+ movq 0[edi], mm0
+ movq 8[edi], mm0
+ movq 16[edi], mm0
+ movq 24[edi], mm0
+ movq 32[edi], mm0
+ movq 40[edi], mm0
+ movq 48[edi], mm0
+ movq 56[edi], mm0
+ movq 64[edi], mm0
+ movq 72[edi], mm0
+ movq 80[edi], mm0
+ movq 88[edi], mm0
+ movq 96[edi], mm0
+ movq 104[edi], mm0
+ movq 112[edi], mm0
+ movq 120[edi], mm0
+ }
+}
+
+static inline void memzero24(void *dst)
+{
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+ movq 0[edi], mm0
+ movq 8[edi], mm0
+ movq 16[edi], mm0
+ }
+}
+
+static inline void memzero48(void *dst)
+{
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+ movq 0[edi], mm0
+ movq 8[edi], mm0
+ movq 16[edi], mm0
+ movq 24[edi], mm0
+ movq 32[edi], mm0
+ movq 40[edi], mm0
+ }
+}
+
+static inline void memzero16(void *dst)
+{
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+ movq 0[edi], mm0
+ movq 8[edi], mm0
+ }
+}
+
+static inline void memzero8(void *dst)
+{
+ __asm {
+ pxor mm0, mm0
+ mov edi, dst
+
+ movq 0[edi], mm0
+ }
+}
+
+static inline void memset_fast_end()
+{
+ _mm_empty();
+}
+
+// Very optimized memcpy() routine for all AMD Athlon and Duron family.
+// This code uses any of FOUR different basic copy methods, depending
+// on the transfer size.
+// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
+// "Streaming Store"), and also uses the software prefetchnta instructions,
+// be sure youre running on Athlon/Duron or other recent CPU before calling!
+
+#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
+// The smallest copy uses the X86 "movsd" instruction, in an optimized
+// form which is an "unrolled loop".
+
+#define IN_CACHE_COPY 64 * 1024 // upper limit for movq/movq copy w/SW prefetch
+// Next is a copy that uses the MMX registers to copy 8 bytes at a time,
+// also using the "unrolled loop" optimization. This code uses
+// the software prefetch instruction to get the data into the cache.
+
+#define UNCACHED_COPY 197 * 1024 // upper limit for movq/movntq w/SW prefetch
+// For larger blocks, which will spill beyond the cache, its faster to
+// use the Streaming Store instruction MOVNTQ. This write instruction
+// bypasses the cache and writes straight to main memory. This code also
+// uses the software prefetch instruction to pre-read the data.
+// USE 64 * 1024 FOR THIS VALUE IF YOURE ALWAYS FILLING A "CLEAN CACHE"
+
+#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
+#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
+// For the largest size blocks, a special technique called Block Prefetch
+// can be used to accelerate the read operations. Block Prefetch reads
+// one address per cache line, for a series of cache lines, in a short loop.
+// This is faster than using software prefetch. The technique is great for
+// getting maximum read bandwidth, especially in DDR memory systems.
+
+// Inline assembly syntax for use with Visual C++
+
+
+static void * memcpy_amd(void *dest, const void *src, size_t n)
+{
+ __asm {
+
+ mov ecx, [n] // number of bytes to copy
+ mov edi, [dest] // destination
+ mov esi, [src] // source
+ mov ebx, ecx // keep a copy of count
+
+ cld
+ cmp ecx, TINY_BLOCK_COPY
+ jb $memcpy_ic_3 // tiny? skip mmx copy
+
+ cmp ecx, 32*1024 // dont align between 32k-64k because
+ jbe $memcpy_do_align // it appears to be slower
+ cmp ecx, 64*1024
+ jbe $memcpy_align_done
+$memcpy_do_align:
+ mov ecx, 8 // a trick thats faster than rep movsb...
+ sub ecx, edi // align destination to qword
+ and ecx, 111b // get the low bits
+ sub ebx, ecx // update copy count
+ neg ecx // set up to jump into the array
+ add ecx, offset $memcpy_align_done
+ jmp ecx // jump to array of movsbs
+
+align 4
+ movsb
+ movsb
+ movsb
+ movsb
+ movsb
+ movsb
+ movsb
+ movsb
+
+$memcpy_align_done: // destination is dword aligned
+ mov ecx, ebx // number of bytes left to copy
+ shr ecx, 6 // get 64-byte block count
+ jz $memcpy_ic_2 // finish the last few bytes
+
+ cmp ecx, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
+ jae $memcpy_uc_test
+
+// This is small block copy that uses the MMX registers to copy 8 bytes
+// at a time. It uses the "unrolled loop" optimization, and also uses
+// the software prefetch instruction to get the data into the cache.
+align 16
+$memcpy_ic_1: // 64-byte block copies, in-cache copy
+
+ prefetchnta [esi + (200*64/34+192)] // start reading ahead
+
+ movq mm0, [esi+0] // read 64 bits
+ movq mm1, [esi+8]
+ movq [edi+0], mm0 // write 64 bits
+ movq [edi+8], mm1 // note: the normal movq writes the
+ movq mm2, [esi+16] // data to cache// a cache line will be
+ movq mm3, [esi+24] // allocated as needed, to store the data
+ movq [edi+16], mm2
+ movq [edi+24], mm3
+ movq mm0, [esi+32]
+ movq mm1, [esi+40]
+ movq [edi+32], mm0
+ movq [edi+40], mm1
+ movq mm2, [esi+48]
+ movq mm3, [esi+56]
+ movq [edi+48], mm2
+ movq [edi+56], mm3
+
+ add esi, 64 // update source pointer
+ add edi, 64 // update destination pointer
+ dec ecx // count down
+ jnz $memcpy_ic_1 // last 64-byte block?
+
+$memcpy_ic_2:
+ mov ecx, ebx // has valid low 6 bits of the byte count
+$memcpy_ic_3:
+ shr ecx, 2 // dword count
+ and ecx, 1111b // only look at the "remainder" bits
+ neg ecx // set up to jump into the array
+ add ecx, offset $memcpy_last_few
+ jmp ecx // jump to array of movsds
+
+$memcpy_uc_test:
+ cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
+ jae $memcpy_bp_1
+
+$memcpy_64_test:
+ or ecx, ecx // _tail end of block prefetch will jump here
+ jz $memcpy_ic_2 // no more 64-byte blocks left
+
+// For larger blocks, which will spill beyond the cache, its faster to
+// use the Streaming Store instruction MOVNTQ. This write instruction
+// bypasses the cache and writes straight to main memory. This code also
+// uses the software prefetch instruction to pre-read the data.
+align 16
+$memcpy_uc_1: // 64-byte blocks, uncached copy
+
+ prefetchnta [esi + (200*64/34+192)] // start reading ahead
+
+ movq mm0,[esi+0] // read 64 bits
+ add edi,64 // update destination pointer
+ movq mm1,[esi+8]
+ add esi,64 // update source pointer
+ movq mm2,[esi-48]
+ movntq [edi-64], mm0 // write 64 bits, bypassing the cache
+ movq mm0,[esi-40] // note: movntq also prevents the CPU
+ movntq [edi-56], mm1 // from READING the destination address
+ movq mm1,[esi-32] // into the cache, only to be over-written
+ movntq [edi-48], mm2 // so that also helps performance
+ movq mm2,[esi-24]
+ movntq [edi-40], mm0
+ movq mm0,[esi-16]
+ movntq [edi-32], mm1
+ movq mm1,[esi-8]
+ movntq [edi-24], mm2
+ movntq [edi-16], mm0
+ dec ecx
+ movntq [edi-8], mm1
+ jnz $memcpy_uc_1 // last 64-byte block?
+
+ jmp $memcpy_ic_2 // almost done
+
+// For the largest size blocks, a special technique called Block Prefetch
+// can be used to accelerate the read operations. Block Prefetch reads
+// one address per cache line, for a series of cache lines, in a short loop.
+// This is faster than using software prefetch, in this case.
+// The technique is great for getting maximum read bandwidth,
+// especially in DDR memory systems.
+$memcpy_bp_1: // large blocks, block prefetch copy
+
+ cmp ecx, CACHEBLOCK // big enough to run another prefetch loop?
+ jl $memcpy_64_test // no, back to regular uncached copy
+
+ mov eax, CACHEBLOCK / 2 // block prefetch loop, unrolled 2X
+ add esi, CACHEBLOCK * 64 // move to the top of the block
+align 16
+$memcpy_bp_2:
+ mov edx, [esi-64] // grab one address per cache line
+ mov edx, [esi-128] // grab one address per cache line
+ sub esi, 128 // go reverse order
+ dec eax // count down the cache lines
+ jnz $memcpy_bp_2 // keep grabbing more lines into cache
+
+ mov eax, CACHEBLOCK // now that its in cache, do the copy
+align 16
+$memcpy_bp_3:
+ movq mm0, [esi ] // read 64 bits
+ movq mm1, [esi+ 8]
+ movq mm2, [esi+16]
+ movq mm3, [esi+24]
+ movq mm4, [esi+32]
+ movq mm5, [esi+40]
+ movq mm6, [esi+48]
+ movq mm7, [esi+56]
+ add esi, 64 // update source pointer
+ movntq [edi ], mm0 // write 64 bits, bypassing cache
+ movntq [edi+ 8], mm1 // note: movntq also prevents the CPU
+ movntq [edi+16], mm2 // from READING the destination address
+ movntq [edi+24], mm3 // into the cache, only to be over-written,
+ movntq [edi+32], mm4 // so that also helps performance
+ movntq [edi+40], mm5
+ movntq [edi+48], mm6
+ movntq [edi+56], mm7
+ add edi, 64 // update dest pointer
+
+ dec eax // count down
+
+ jnz $memcpy_bp_3 // keep copying
+ sub ecx, CACHEBLOCK // update the 64-byte block count
+ jmp $memcpy_bp_1 // keep processing chunks
+
+// The smallest copy uses the X86 "movsd" instruction, in an optimized
+// form which is an "unrolled loop". Then it handles the last few bytes.
+align 4
+ movsd
+ movsd // perform last 1-15 dword copies
+ movsd
+ movsd
+ movsd
+ movsd
+ movsd
+ movsd
+ movsd
+ movsd // perform last 1-7 dword copies
+ movsd
+ movsd
+ movsd
+ movsd
+ movsd
+ movsd
+
+$memcpy_last_few: // dword aligned from before movsds
+ mov ecx, ebx // has valid low 2 bits of the byte count
+ and ecx, 11b // the last few cows must come home
+ jz $memcpy_final // no more, lets leave
+ rep movsb // the last 1, 2, or 3 bytes
+
+$memcpy_final:
+// emms // clean up the MMX state
+ sfence // flush the write buffer
+ mov eax, [dest] // ret value = destination pointer
+
+ }
+}
+
+#elif defined(_M_X64)
+static inline void memzero24(void *dst)
+{
+ int32_t j;
+ int32_t *d = (int32_t *)dst;
+ for (j=0;j<24;j+=4)
+ {
+ d[j] = 0;
+ }
+}
+static inline void memset_fast_end() {}
+#else
+static inline void memzero_fast16(void *dst, unsigned long i)
+{
+ int32_t j;
+ int32_t *d = (int32_t *)dst;
+ for (j=0;j<i;j+=4)
+ {
+ d[j] = 0;
+ }
+}
+static inline void memzero24(void *dst)
+{
+ int32_t j;
+ int32_t *d = (int32_t *)dst;
+ for (j=0;j<24;j+=4)
+ {
+ d[j] = 0;
+ }
+}
+static inline void memset_fast_end() {}
+#endif
+
+#define UNDEFINED_REFERENCE ((int)0x80000000)
+typedef int32_t h264_ref_t;
+
+#define ET_SIZE 300 //!< size of error text buffer
+extern char errortext[ET_SIZE]; //!< buffer for error message for exit with error()
+extern int sse2_flag, mmx_flag, sse_flag, sse3_flag, sse4_1_flag;
+/***********************************************************************
+ * T y p e d e f i n i t i o n s f o r J M
+ ***********************************************************************
+ */
+
+typedef enum
+{
+ LumaComp = 0,
+ CrComp = 1,
+ CbComp = 2
+} Color_Component;
+
+/***********************************************************************
+ * D a t a t y p e s f o r C A B A C
+ ***********************************************************************
+ */
+
+typedef struct pix_pos
+{
+ int available;
+ int mb_addr;
+ short x;
+ short y;
+ short pos_x;
+ short pos_y;
+} PixelPos;
+
+//! struct to characterize the state of the arithmetic coding engine
+typedef struct
+{
+ unsigned int Drange;
+ unsigned int Dvalue;
+ int DbitsLeft;
+ byte *Dcodestrm;
+ int *Dcodestrm_len;
+} DecodingEnvironment;
+
+typedef DecodingEnvironment *DecodingEnvironmentPtr;
+
+typedef short MotionVector[2];
+
+//! definition of motion parameters
+typedef struct pic_motion
+{
+ h264_ref_t ref_pic_id;
+ h264_ref_t ref_id;
+ MotionVector mv;
+ char ref_idx;
+} PicMotion;
+
+// TODO: benski> might be more efficient to make a [list][subblock_y][subblock_x] array of these values instead of parallel arrays
+typedef struct motion_params
+{
+ PicMotion **motion[2];
+ byte ** moving_block;
+} MotionParams;
+
+//! struct for context management
+typedef struct
+{
+ uint16_t state; // index into state-table CP
+ unsigned char MPS; // Least Probable Symbol 0/1 CP
+ unsigned char dummy; // for alignment
+} BiContextType;
+
+typedef BiContextType *BiContextTypePtr;
+
+
+/**********************************************************************
+ * C O N T E X T S F O R T M L S Y N T A X E L E M E N T S
+ **********************************************************************
+ */
+
+#define NUM_MB_TYPE_CTX 11
+#define NUM_B8_TYPE_CTX 9
+#define NUM_MV_RES_CTX 10
+#define NUM_REF_NO_CTX 6
+#define NUM_DELTA_QP_CTX 4
+#define NUM_MB_AFF_CTX 4
+#define NUM_TRANSFORM_SIZE_CTX 3
+
+// structures that will be declared somewhere else
+struct storable_picture;
+struct datapartition;
+struct syntaxelement;
+
+typedef struct
+{
+ BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX];
+ BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX];
+ BiContextType mv_res_contexts [2][NUM_MV_RES_CTX];
+ BiContextType ref_no_contexts [2][NUM_REF_NO_CTX];
+ BiContextType delta_qp_contexts[NUM_DELTA_QP_CTX];
+ BiContextType mb_aff_contexts [NUM_MB_AFF_CTX];
+} MotionInfoContexts;
+
+#define NUM_IPR_CTX 2
+#define NUM_CIPR_CTX 4
+#define NUM_CBP_CTX 4
+#define NUM_BCBP_CTX 4
+#define NUM_MAP_CTX 15
+#define NUM_LAST_CTX 15
+#define NUM_ONE_CTX 5
+#define NUM_ABS_CTX 5
+
+
+typedef struct
+{
+ BiContextType transform_size_contexts [NUM_TRANSFORM_SIZE_CTX];
+ BiContextType ipr_contexts [NUM_IPR_CTX];
+ BiContextType cipr_contexts[NUM_CIPR_CTX];
+ BiContextType cbp_contexts [3][NUM_CBP_CTX];
+ BiContextType bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX];
+ BiContextType map_contexts [2][NUM_BLOCK_TYPES][NUM_MAP_CTX+1]; // +1 for better alignment
+ BiContextType last_contexts[2][NUM_BLOCK_TYPES][NUM_LAST_CTX+1]; // +1 for better alignment
+ BiContextType one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX];
+ BiContextType abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX];
+} TextureInfoContexts;
+
+
+//*********************** end of data type definition for CABAC *******************
+
+/***********************************************************************
+ * N e w D a t a t y p e s f o r T M L
+ ***********************************************************************
+ */
+
+/*! Buffer structure for decoded reference picture marking commands */
+typedef struct DecRefPicMarking_s
+{
+ int memory_management_control_operation;
+ int difference_of_pic_nums_minus1;
+ int long_term_pic_num;
+ int long_term_frame_idx;
+ int max_long_term_frame_idx_plus1;
+ struct DecRefPicMarking_s *Next;
+} DecRefPicMarking_t;
+
+
+//! definition of pic motion parameters
+typedef struct pic_motion_params2
+{
+ h264_ref_t ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x]
+ h264_ref_t ref_id; //!< reference picture identifier [list][subblock_y][subblock_x]
+ short mv[2]; //!< motion vector [list][subblock_x][subblock_y][component]
+ char ref_idx; //!< reference picture [list][subblock_y][subblock_x]
+ byte mb_field; //!< field macroblock indicator
+ byte field_frame; //!< indicates if co_located is field or frame.
+} PicMotionParams2;
+
+//! Macroblock
+typedef struct macroblock
+{
+ struct slice *p_Slice; //!< pointer to the current slice
+ struct img_par *p_Vid; //!< pointer to VideoParameters
+ struct inp_par *p_Inp;
+ int mbAddrX; //!< current MB address
+ int mb_x;
+ int mb_y;
+ int block_x;
+ int block_y;
+ int block_y_aff;
+ int pix_x;
+ int pix_y;
+ int pix_c_x;
+ int pix_c_y;
+
+ int subblock_x;
+ int subblock_y;
+
+ int qp; //!< QP luma
+ int qpc[2]; //!< QP chroma
+ int qp_scaled[MAX_PLANE]; //!< QP scaled for all comps.
+ Boolean is_lossless;
+ Boolean is_intra_block;
+ Boolean is_v_block;
+
+ short slice_nr;
+ short delta_quant; //!< for rate control
+
+ struct macroblock *mb_up; //!< pointer to neighboring MB (CABAC)
+ struct macroblock *mb_left; //!< pointer to neighboring MB (CABAC)
+
+ // some storage of macroblock syntax elements for global access
+ int mb_type;
+ short mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2]; //!< indices correspond to [forw,backw][block_y][block_x][x,y]
+ int cbp;
+ int64 cbp_blk [3];
+ int64 cbp_bits [3];
+ int64 cbp_bits_8x8[3];
+
+ int i16mode;
+ char b8mode[4];
+ char b8pdir[4];
+ char ei_flag; //!< error indicator flag that enables concealment
+ char dpl_flag; //!< error indicator flag that signals a missing data partition
+ char ipmode_DPCM;
+
+ short DFDisableIdc;
+ short DFAlphaC0Offset;
+ short DFBetaOffset;
+
+ char c_ipred_mode; //!< chroma intra prediction mode
+ Boolean mb_field;
+
+ int skip_flag;
+
+ int mb_addr_left, mb_addr_up, mb_addr_upper_right, mb_addr_upper_left;
+ Boolean mb_avail_left, mb_avail_up, mb_avail_upper_right, mb_avail_upper_left;
+
+ Boolean luma_transform_size_8x8_flag;
+ Boolean NoMbPartLessThan8x8Flag;
+
+ void (*itrans_8x8)(struct macroblock *currMB, ColorPlane pl, int ioff, int joff);
+
+ void (*GetMVPredictor) (struct macroblock *currMB, PixelPos *block,
+ short pmv[2], short ref_frame, struct pic_motion **motion, int mb_x, int mb_y, int blockshape_x, int blockshape_y);
+
+ int (*read_and_store_CBP_block_bit) (struct macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type);
+ char (*readRefPictureIdx) (struct syntaxelement *currSE, struct datapartition *dP, int list);
+
+} Macroblock;
+
+//! Syntaxelement
+typedef struct syntaxelement
+{
+ int value1; //!< numerical value of syntax element
+ int value2; //!< for blocked symbols, e.g. run/level
+ int len; //!< length of code
+ //int inf; //!< info part of CAVLC code
+
+#if TRACE
+ #define TRACESTRING_SIZE 100 //!< size of trace string
+ char tracestring[TRACESTRING_SIZE]; //!< trace string
+#endif
+
+ //! for mapping of CAVLC to syntaxElement
+ void (*mapping)(int len, int info, int *value1, int *value2);
+} SyntaxElement;
+
+
+//! Bitstream
+typedef struct
+{
+ // CABAC Decoding
+ int read_len; //!< actual position in the codebuffer, CABAC only
+ int code_len; //!< overall codebuffer length, CABAC only
+ // CAVLC Decoding
+ int frame_bitoffset; //!< actual position in the codebuffer, bit-oriented, CAVLC only
+ int bitstream_length; //!< over codebuffer lnegth, byte oriented, CAVLC only
+
+ byte *streamBuffer; //!< actual codebuffer for read bytes
+} Bitstream;
+
+
+/* === 4x4 block typedefs === */
+// 32 bit precision
+typedef int h264_int_block_row_t[BLOCK_SIZE];
+typedef h264_int_block_row_t h264_int_block_t[BLOCK_SIZE];
+// 16 bit precision
+typedef int16_t h264_short_block_row_t[BLOCK_SIZE];
+typedef h264_short_block_row_t h264_short_block_t[BLOCK_SIZE];
+// 8 bit precision
+
+/* === 8x8 block typedefs === */
+// 32 bit precision
+typedef int h264_int_8x8block_row_t[BLOCK_SIZE_8x8];
+typedef h264_int_8x8block_row_t h264_int_8x8block_t[BLOCK_SIZE_8x8];
+// 16 bit precision
+typedef int16_t h264_short_8x8block_row_t[BLOCK_SIZE_8x8];
+typedef h264_short_8x8block_row_t h264_short_8x8block_t[BLOCK_SIZE_8x8];
+// 8 bit precision
+typedef imgpel h264_imgpel_8x8block_row_t[BLOCK_SIZE_8x8];
+typedef h264_imgpel_8x8block_row_t h264_imgpel_8x8block_t[BLOCK_SIZE_8x8];
+
+/* === 16x16 block typedefs === */
+// 32 bit precision
+typedef int h264_int_macroblock_row_t[MB_BLOCK_SIZE];
+typedef h264_int_macroblock_row_t h264_int_macroblock_t[MB_BLOCK_SIZE];
+// 16 bit precision
+typedef int16_t h264_short_macroblock_row_t[MB_BLOCK_SIZE];
+typedef h264_short_macroblock_row_t h264_short_macroblock_t[MB_BLOCK_SIZE];
+// 8 bit precision
+typedef imgpel h264_imgpel_macroblock_row_t[MB_BLOCK_SIZE];
+typedef h264_imgpel_macroblock_row_t h264_imgpel_macroblock_t[MB_BLOCK_SIZE];
+
+
+
+
+typedef int h264_pic_position[2];
+typedef byte h264_4x4_byte[BLOCK_SIZE][BLOCK_SIZE];
+typedef h264_4x4_byte h264_nz_coefficient[3];
+
+//! DataPartition
+typedef struct datapartition
+{
+
+ Bitstream *bitstream;
+ DecodingEnvironment de_cabac;
+
+} DataPartition;
+
+//! Slice
+typedef struct slice
+{
+ struct img_par *p_Vid;
+ struct inp_par *p_Inp;
+ pic_parameter_set_rbsp_t *active_pps;
+ seq_parameter_set_rbsp_t *active_sps;
+
+ struct colocated_params *p_colocated;
+ struct colocated_params *Co_located_JV[MAX_PLANE]; //!< p_colocated to be used during 4:4:4 independent mode decoding
+
+ int mb_aff_frame_flag;
+ int direct_spatial_mv_pred_flag; //!< Indicator for direct mode type (1 for Spatial, 0 for Temporal)
+ int num_ref_idx_l0_active; //!< number of available list 0 references
+ int num_ref_idx_l1_active; //!< number of available list 1 references
+
+ int qp;
+ int slice_qp_delta;
+ int qs;
+ int slice_qs_delta;
+ int slice_type; //!< slice type
+ int model_number; //!< cabac model number
+ PictureStructure structure; //!< Identify picture structure type
+ int start_mb_nr; //!< MUST be set by NAL even in case of ei_flag == 1
+ int max_part_nr;
+ int dp_mode; //!< data partitioning mode
+ int last_dquant;
+
+ // int last_mb_nr; //!< only valid when entropy coding == CABAC
+ DataPartition *partArr; //!< array of partitions
+ MotionInfoContexts *mot_ctx; //!< pointer to struct of context models for use in CABAC
+ TextureInfoContexts *tex_ctx; //!< pointer to struct of context models for use in CABAC
+
+ int mvscale[6][MAX_REFERENCE_PICTURES];
+
+ int ref_pic_list_reordering_flag_l0;
+ int *reordering_of_pic_nums_idc_l0;
+ int *abs_diff_pic_num_minus1_l0;
+ int *long_term_pic_idx_l0;
+ int ref_pic_list_reordering_flag_l1;
+ int *reordering_of_pic_nums_idc_l1;
+ int *abs_diff_pic_num_minus1_l1;
+ int *long_term_pic_idx_l1;
+
+
+ short DFDisableIdc; //!< Disable deblocking filter on slice
+ short DFAlphaC0Offset; //!< Alpha and C0 offset for filtering slice
+ short DFBetaOffset; //!< Beta offset for filtering slice
+
+ int pic_parameter_set_id; //!<the ID of the picture parameter set the slice is reffering to
+
+ int dpB_NotPresent; //!< non-zero, if data partition B is lost
+ int dpC_NotPresent; //!< non-zero, if data partition C is lost
+
+
+ __declspec(align(32)) h264_imgpel_macroblock_t mb_pred[MAX_PLANE];
+ __declspec(align(32)) h264_imgpel_macroblock_t mb_rec[MAX_PLANE];
+ __declspec(align(32)) union
+ {
+ __declspec(align(32)) h264_short_8x8block_t mb_rres8[MAX_PLANE][4];
+ __declspec(align(32)) h264_short_macroblock_t cof[MAX_PLANE];
+ __declspec(align(32)) h264_short_block_t cof4[MAX_PLANE][16]; // TODO: get this to work, one of these days
+ __declspec(align(32)) h264_short_macroblock_t ipcm[MAX_PLANE];
+ };
+
+ int cofu[16];
+
+ // Scaling matrix info
+ int InvLevelScale4x4_Intra[3][6][4][4];
+ int InvLevelScale4x4_Inter[3][6][4][4];
+ int InvLevelScale8x8_Intra[3][6][64];
+ int InvLevelScale8x8_Inter[3][6][64];
+
+ int *qmatrix[12];
+
+ // Cabac
+ // TODO: we could optimize coefficient reading by storing the levels/runs instead of coefficients
+ int16_t coeff[64]; // one more for EOB
+ int coeff_ctr;
+ int pos;
+
+ //weighted prediction
+ unsigned int apply_weights;
+ unsigned int luma_log2_weight_denom;
+ unsigned int chroma_log2_weight_denom;
+ int wp_weight[2][MAX_REFERENCE_PICTURES][3]; // weight in [list][index][component] order
+ int wp_offset[6][MAX_REFERENCE_PICTURES][3]; // offset in [list][index][component] order
+ int wbp_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; //weight in [list][fw_index][bw_index][component] order
+ int wp_round_luma;
+ int wp_round_chroma;
+
+ void (*read_CBP_and_coeffs_from_NAL) (Macroblock *currMB);
+ int (*decode_one_component ) (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, struct storable_picture *dec_picture);
+ int (*readSlice ) (struct img_par *, struct inp_par *);
+ int (*nal_startcode_follows ) (struct slice*, int );
+ void (*read_motion_info_from_NAL) (Macroblock *currMB);
+ void (*read_one_macroblock ) (Macroblock *currMB);
+ void (*interpret_mb_mode ) (Macroblock *currMB);
+ void (*compute_colocated ) (struct slice *currSlice, struct colocated_params *p, struct storable_picture **listX[6]);
+
+ void (*linfo_cbp_intra) (int len,int info,int *cbp, int *dummy);
+ void (*linfo_cbp_inter) (int len,int info,int *cbp, int *dummy);
+} Slice;
+
+//****************************** ~DM ***********************************
+
+// image parameters
+typedef struct img_par
+{
+ struct inp_par *p_Inp;
+ pic_parameter_set_rbsp_t *active_pps;
+ seq_parameter_set_rbsp_t *active_sps;
+ seq_parameter_set_rbsp_t SeqParSet[MAXSPS];
+ pic_parameter_set_rbsp_t PicParSet[MAXPPS];
+
+ struct sei_params *p_SEI;
+
+ struct old_slice_par *old_slice;
+ int number; //!< frame number
+ unsigned int current_mb_nr; // bitstream order
+ unsigned int num_dec_mb;
+ short current_slice_nr;
+ int *intra_block;
+
+ int qp; //!< quant for the current frame
+
+ int sp_switch; //!< 1 for switching sp, 0 for normal sp
+ int type; //!< image type INTER/INTRA
+ int width;
+ int height;
+ int width_cr; //!< width chroma
+ int height_cr; //!< height chroma
+ int mb_x;
+ int mb_y;
+ int block_x;
+ int block_y;
+ int pix_c_x;
+ int pix_c_y;
+
+ int allrefzero;
+
+ byte **ipredmode; //!< prediction type [90][74]
+ h264_nz_coefficient *nz_coeff;
+ int **siblock;
+ int cod_counter; //!< Current count of number of skipped macroblocks in a row
+
+ int structure; //!< Identify picture structure type
+
+ Slice *currentSlice; //!< pointer to current Slice data struct
+ Macroblock *mb_data; //!< array containing all MBs of a whole frame
+ Macroblock *mb_data_JV[MAX_PLANE]; //!< mb_data to be used for 4:4:4 independent mode
+ int colour_plane_id; //!< colour_plane_id of the current coded slice
+ int ChromaArrayType;
+
+ // For MB level frame/field coding
+ int mb_aff_frame_flag;
+
+ // for signalling to the neighbour logic that this is a deblocker call
+ int DeblockCall;
+ byte mixedModeEdgeFlag;
+
+ // picture error concealment
+ // concealment_head points to first node in list, concealment_end points to
+ // last node in list. Initialize both to NULL, meaning no nodes in list yet
+ struct concealment_node *concealment_head;
+ struct concealment_node *concealment_end;
+
+ DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations
+
+ int num_ref_idx_l0_active; //!< number of forward reference
+ int num_ref_idx_l1_active; //!< number of backward reference
+
+ int slice_group_change_cycle;
+
+ int redundant_pic_cnt;
+
+ unsigned int pre_frame_num; //!< store the frame_num in the last decoded slice. For detecting gap in frame_num.
+ int non_conforming_stream;
+
+ // End JVT-D101
+ // POC200301: from unsigned int to int
+ int toppoc; //poc for this top field // POC200301
+ int bottompoc; //poc of bottom field of frame
+ int framepoc; //poc of this frame // POC200301
+ unsigned int frame_num; //frame_num for this frame
+ unsigned int field_pic_flag;
+ byte bottom_field_flag;
+
+ //the following is for slice header syntax elements of poc
+ // for poc mode 0.
+ unsigned int pic_order_cnt_lsb;
+ int delta_pic_order_cnt_bottom;
+ // for poc mode 1.
+ int delta_pic_order_cnt[3];
+
+ // ////////////////////////
+ // for POC mode 0:
+ signed int PrevPicOrderCntMsb;
+ unsigned int PrevPicOrderCntLsb;
+ signed int PicOrderCntMsb;
+
+ // for POC mode 1:
+ unsigned int AbsFrameNum;
+ signed int ExpectedPicOrderCnt, PicOrderCntCycleCnt, FrameNumInPicOrderCntCycle;
+ unsigned int PreviousFrameNum, FrameNumOffset;
+ int ExpectedDeltaPerPicOrderCntCycle;
+ int PreviousPOC, ThisPOC;
+ int PreviousFrameNumOffset;
+ // /////////////////////////
+
+ int idr_flag;
+ int nal_reference_idc; //!< nal_reference_idc from NAL unit
+
+ int idr_pic_id;
+
+ int MaxFrameNum;
+
+ unsigned int PicWidthInMbs;
+ unsigned int PicHeightInMapUnits;
+ unsigned int FrameHeightInMbs;
+ unsigned int PicHeightInMbs;
+ unsigned int PicSizeInMbs;
+ unsigned int FrameSizeInMbs;
+ unsigned int oldFrameSizeInMbs;
+
+ int no_output_of_prior_pics_flag;
+ int long_term_reference_flag;
+ int adaptive_ref_pic_buffering_flag;
+
+ int last_has_mmco_5;
+ int last_pic_bottom_field;
+
+ // Fidelity Range Extensions Stuff
+ short bitdepth_luma;
+ short bitdepth_chroma;
+ int bitdepth_scale[2];
+ int bitdepth_luma_qp_scale;
+ int bitdepth_chroma_qp_scale;
+ unsigned int dc_pred_value_comp[MAX_PLANE]; //!< component value for DC prediction (depends on component pel bit depth)
+ int max_pel_value_comp[MAX_PLANE]; //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth)
+ int Transform8x8Mode;
+ int profile_idc;
+ int yuv_format;
+ int lossless_qpprime_flag;
+ int num_blk8x8_uv;
+ int num_uv_blocks;
+ int num_cdc_coeff;
+ int mb_cr_size_x;
+ int mb_cr_size_y;
+ int mb_cr_size_x_blk;
+ int mb_cr_size_y_blk;
+ int mb_size[3][2]; //!< component macroblock dimensions
+ int mb_size_blk[3][2]; //!< component macroblock dimensions
+ int mb_size_shift[3][2];
+ int subpel_x;
+ int subpel_y;
+ int shiftpel_x;
+ int shiftpel_y;
+
+ int max_vmv_r; //!< maximum vertical motion vector range in luma quarter frame pixel units for the current level_idc
+ int max_mb_vmv_r; //!< maximum vertical motion vector range in luma quarter pixel units for the current level_idc
+
+ // picture error concealment
+ int last_ref_pic_poc;
+ int ref_poc_gap;
+ int poc_gap;
+ int earlier_missing_poc;
+ unsigned int frame_to_conceal;
+ int IDR_concealment_flag;
+ int conceal_slice_type;
+
+ // random access point decoding
+ int recovery_point;
+ int recovery_point_found;
+ int recovery_frame_cnt;
+ int recovery_frame_num;
+ int recovery_poc;
+
+ int separate_colour_plane_flag;
+
+ int frame_number;
+ int init_bl_done;
+
+ // Redundant slices. Should be moved to another structure and allocated only if extended profile
+ unsigned int previous_frame_num; //!< frame number of previous slice
+ int ref_flag[17]; //!< 0: i-th previous frame is incorrect
+ //!< non-zero: i-th previous frame is correct
+ int Is_primary_correct; //!< if primary frame is correct, 0: incorrect
+ int Is_redundant_correct; //!< if redundant frame is correct, 0:incorrect
+ int redundant_slice_ref_idx; //!< reference index of redundant slice
+
+ //FILE *p_log; //!< SNR file
+ int LastAccessUnitExists;
+ int NALUCount;
+
+ Boolean global_init_done;
+
+ int *qp_per_matrix;
+ int *qp_rem_matrix;
+
+ struct frame_store *last_out_fs;
+ int pocs_in_dpb[100];
+
+
+ struct storable_picture *dec_picture;
+ struct storable_picture *dec_picture_JV[MAX_PLANE]; //!< dec_picture to be used during 4:4:4 independent mode decoding
+ struct storable_picture *no_reference_picture; //!< dummy storable picture for recovery point
+ struct storable_picture **listX[6];
+
+ // Error parameters
+ struct object_buffer *erc_object_list;
+ struct ercVariables_s *erc_errorVar;
+
+ int erc_mvperMB;
+ struct img_par *erc_img;
+ int ec_flag[SE_MAX_ELEMENTS]; //!< array to set errorconcealment
+
+ struct memory_input_struct *mem_input;
+
+ struct frame_store *out_buffer;
+
+ struct storable_picture *pending_output;
+ int pending_output_state;
+ int recovery_flag;
+
+ // dpb
+ struct decoded_picture_buffer *p_Dpb;
+
+ char listXsize[6];
+ // report
+ char cslice_type[9];
+ // FMO
+ int *MbToSliceGroupMap;
+ int *MapUnitToSliceGroupMap;
+ int NumberOfSliceGroups; // the number of slice groups -1 (0 == scan order, 7 == maximum)
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ struct tone_mapping_struct_s *seiToneMapping;
+#endif
+
+ // benski> buffer of storablge pictures ready for output.
+ // might be able to optimize a tad by making a ringbuffer, but i doubt it matters
+ struct storable_picture **out_pictures;
+ size_t size_out_pictures;
+ size_t num_out_pictures;
+
+ ImageCache image_cache[2]; // [0] is luma [1] is chroma (shared for both planes)
+ MotionCache motion_cache;
+
+ h264_pic_position *PicPos; //! Helper array to access macroblock positions.
+
+ NALU_t *nalu; // a cache so we don't re-alloc every time
+
+ void (*getNeighbour) (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+ void (*getNeighbourPX_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+ void (*getNeighbourXP_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
+ void (*getNeighbourLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*getNeighbourPXLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*getNeighbourXPLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*getNeighbourLeftLuma)(const Macroblock *currMB, PixelPos *pix);
+ void (*getNeighbourNXLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN<0, yN full range
+ void (*getNeighbourLeft) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN<0, yN=0
+ void (*getNeighbourUp) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN=0, yN<0
+ void (*getNeighbourNX) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN full range
+ void (*getNeighbourNP) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0
+ void (*getNeighbourNPChromaNB)(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0
+ void (*getNeighbour0X) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN=0, yN full range
+ void (*getNeighbour0XLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN=0, yN full range
+ void (*getNeighbourX0) (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); // xN full range, yN = 0
+ void (*getNeighbourUpLuma) (const Macroblock *currMB, PixelPos *pix); // xN=0, yN<0
+ void (*getNeighbourNPLumaNB)(const Macroblock *currMB, int yN, PixelPos *pix);
+ void (*getNeighbourPXLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*getNeighbourPXLumaNB_NoPos)(const Macroblock *currMB, int yN, PixelPos *pix);
+ void (*getNeighbourPPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*getNeighbourXPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*getNeighbourXPLumaNB_NoPos)(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
+ void (*get_mb_block_pos) (const h264_pic_position *PicPos, int mb_addr, short *x, short *y);
+ void (*GetStrength) (byte Strength[16], Macroblock *MbQ, int dir,int edge, int mvlimit, struct storable_picture *p);
+ void (*EdgeLoopLuma) (ColorPlane pl, struct video_image *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, struct storable_picture *p);
+ void (*EdgeLoopChroma) (struct video_image *image, byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, struct storable_picture *p);
+} VideoParameters;
+
+// input parameters from configuration file
+typedef struct inp_par
+{
+ int intra_profile_deblocking; //!< Loop filter usage determined by flags and parameters in bitstream
+
+ // Output sequence format related variables
+ FrameFormat output; //!< output related information
+
+#ifdef _LEAKYBUCKET_
+ unsigned long R_decoder; //!< Decoder Rate in HRD Model
+ unsigned long B_decoder; //!< Decoder Buffer size in HRD model
+ unsigned long F_decoder; //!< Decoder Initial buffer fullness in HRD model
+ char LeakyBucketParamFile[FILE_NAME_SIZE]; //!< LeakyBucketParamFile
+#endif
+
+ // picture error concealment
+ int ref_poc_gap;
+ int poc_gap;
+} InputParameters;
+
+typedef struct old_slice_par
+{
+ unsigned field_pic_flag;
+ unsigned frame_num;
+ int nal_ref_idc;
+ unsigned pic_oder_cnt_lsb;
+ int delta_pic_oder_cnt_bottom;
+ int delta_pic_order_cnt[2];
+ byte bottom_field_flag;
+ byte idr_flag;
+ int idr_pic_id;
+ int pps_id;
+} OldSliceParams;
+
+typedef struct decoder_params
+{
+ InputParameters *p_Inp; //!< Input Parameters
+ VideoParameters *p_Vid; //!< Image Parameters
+
+} DecoderParams;
+
+#ifdef TRACE
+extern FILE *p_trace; //!< Trace file
+extern int bitcounter;
+#endif
+
+// prototypes
+
+extern void error(char *text, int code);
+
+// dynamic mem allocation
+extern int init_global_buffers(VideoParameters *p_Vid);
+extern void free_global_buffers(VideoParameters *p_Vid);
+
+extern int RBSPtoSODB(byte *streamBuffer, int last_byte_pos);
+extern int EBSPtoRBSP(byte *streamBuffer, int end_bytepos);
+
+void FreePartition (DataPartition *dp, int n);
+DataPartition *AllocPartition(int n);
+
+void tracebits(const char *trace_str, int len, int info,int value1);
+void tracebits2(const char *trace_str, int len, int info);
+
+unsigned CeilLog2 ( unsigned uiVal);
+unsigned CeilLog2_sf( unsigned uiVal);
+
+// For 4:4:4 independent mode
+extern void change_plane_JV( VideoParameters *p_Vid, int nplane );
+extern void make_frame_picture_JV(VideoParameters *p_Vid);
+
+
+#endif
+
+
diff --git a/Src/h264dec/ldecod/inc/header.h b/Src/h264dec/ldecod/inc/header.h
new file mode 100644
index 00000000..f3185b07
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/header.h
@@ -0,0 +1,22 @@
+/*!
+ *************************************************************************************
+ * \file header.h
+ *
+ * \brief
+ * Prototypes for header.c
+ *************************************************************************************
+ */
+
+#ifndef _HEADER_H_
+#define _HEADER_H_
+
+extern void FirstPartOfSliceHeader(Slice *currSlice);
+extern void RestOfSliceHeader (Slice *currSlice);
+
+extern void dec_ref_pic_marking(VideoParameters *p_Vid, Bitstream *currStream);
+
+extern void decode_poc(VideoParameters *p_Vid);
+extern int dumppoc(VideoParameters *p_Vid);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/image.h b/Src/h264dec/ldecod/inc/image.h
new file mode 100644
index 00000000..2540f3c5
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/image.h
@@ -0,0 +1,33 @@
+
+/*!
+ ************************************************************************
+ * \file image.h
+ *
+ * \brief
+ * prototypes for image.c
+ *
+ ************************************************************************
+ */
+
+#ifndef _IMAGE_H_
+#define _IMAGE_H_
+
+#include "mbuffer.h"
+
+extern int picture_order(VideoParameters *p_Vid);
+
+extern void decode_one_slice (Slice *currSlice);
+
+extern void exit_picture(VideoParameters *p_Vid, StorablePicture **dec_picture);
+extern int decode_one_frame(VideoParameters *p_Vid, uint64_t time_code);
+
+extern int is_new_picture(StorablePicture *dec_picture, Slice *currSlice, OldSliceParams *p_old_slice);
+extern void init_old_slice(OldSliceParams *p_old_slice);
+// For 4:4:4 independent mode
+extern void copy_dec_picture_JV( VideoParameters *p_Vid, StorablePicture *dst, StorablePicture *src );
+
+extern void frame_postprocessing(VideoParameters *p_Vid);
+extern void field_postprocessing(VideoParameters *p_Vid);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/intra16x16_pred.h b/Src/h264dec/ldecod/inc/intra16x16_pred.h
new file mode 100644
index 00000000..6680280a
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/intra16x16_pred.h
@@ -0,0 +1,25 @@
+/*!
+ *************************************************************************************
+ * \file intra16x16_pred.h
+ *
+ * \brief
+ * definitions for intra 16x16 prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+
+#ifndef _INTRA16x16_PRED_H_
+#define _INTRA16x16_PRED_H_
+
+#include "global.h"
+#include "mbuffer.h"
+
+extern int intrapred16x16(Macroblock *currMB, ColorPlane pl, int b8);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/intra4x4_pred.h b/Src/h264dec/ldecod/inc/intra4x4_pred.h
new file mode 100644
index 00000000..a50c5262
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/intra4x4_pred.h
@@ -0,0 +1,25 @@
+/*!
+ *************************************************************************************
+ * \file intra4x4_pred.h
+ *
+ * \brief
+ * definitions for intra 4x4 prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+
+#ifndef _INTRA4x4_PRED_H_
+#define _INTRA4x4_PRED_H_
+
+#include "global.h"
+#include "mbuffer.h"
+
+extern int intrapred(Macroblock *currMB, ColorPlane pl, int ioff, int joff, int img_block_x, int img_block_y);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/intra8x8_pred.h b/Src/h264dec/ldecod/inc/intra8x8_pred.h
new file mode 100644
index 00000000..ff238460
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/intra8x8_pred.h
@@ -0,0 +1,25 @@
+/*!
+ *************************************************************************************
+ * \file intra8x8_pred.h
+ *
+ * \brief
+ * definitions for intra 8x8 prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+
+#ifndef _INTRA8x8_PRED_H_
+#define _INTRA8x8_PRED_H_
+
+#include "global.h"
+#include "mbuffer.h"
+
+extern int intrapred8x8(Macroblock *currMB, ColorPlane pl, int ioff, int joff);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/leaky_bucket.h b/Src/h264dec/ldecod/inc/leaky_bucket.h
new file mode 100644
index 00000000..ad605a6a
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/leaky_bucket.h
@@ -0,0 +1,26 @@
+
+/*!
+ *************************************************************************************
+ * \file leaky_bucket.h
+ *
+ * \brief
+ * Header for Leaky Buffer parameters
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Shankar Regunathan <shanre@microsoft.com>
+ *************************************************************************************
+ */
+#ifndef _LEAKY_BUCKET_H_
+#define _LEAKY_BUCKET_H_
+
+#include "global.h"
+
+#ifdef _LEAKYBUCKET_
+// Leaky Bucket functions
+unsigned long GetBigDoubleWord(FILE *fp);
+void calc_buffer(InputParameters *p_Inp);
+#endif
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/loopfilter.h b/Src/h264dec/ldecod/inc/loopfilter.h
new file mode 100644
index 00000000..c8b739fa
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/loopfilter.h
@@ -0,0 +1,23 @@
+/*!
+ ************************************************************************
+ * \file
+ * loopfilter.h
+ * \brief
+ * external deblocking filter interface
+ ************************************************************************
+ */
+
+#ifndef _LOOPFILTER_H_
+#define _LOOPFILTER_H_
+
+#include "global.h"
+#include "mbuffer.h"
+
+extern void DeblockPicture(VideoParameters *p_Vid, StorablePicture *p) ;
+
+
+extern void EdgeLoopLumaNormal_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p);
+extern void EdgeLoopLumaNormal_Horiz_sse2(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p);
+extern void EdgeLoopChromaNormal_Horiz(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p);
+
+#endif //_LOOPFILTER_H_
diff --git a/Src/h264dec/ldecod/inc/macroblock.h b/Src/h264dec/ldecod/inc/macroblock.h
new file mode 100644
index 00000000..d11547a4
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/macroblock.h
@@ -0,0 +1,177 @@
+/*!
+ ************************************************************************
+ * \file macroblock.h
+ *
+ * \brief
+ * Arrays for macroblock encoding
+ *
+ * \author
+ * Inge Lille-Langoy <inge.lille-langoy@telenor.com>
+ * Copyright (C) 1999 Telenor Satellite Services, Norway
+ ************************************************************************
+ */
+
+#ifndef _MACROBLOCK_H_
+#define _MACROBLOCK_H_
+
+#include "global.h"
+#include "mbuffer.h"
+#include "block.h"
+
+//! single scan pattern
+static const byte SNGL_SCAN[16][2] =
+{
+ {0,0},{1,0},{0,1},{0,2},
+ {1,1},{2,0},{3,0},{2,1},
+ {1,2},{0,3},{1,3},{2,2},
+ {3,1},{3,2},{2,3},{3,3}
+};
+
+static const byte SNGL_SCAN_1D[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15};
+static const byte SNGL_SCAN_DC[16] = { 0, 1, 2, 8, 3, 4, 5, 6, 9, 10, 11, 12, 7, 13, 14, 15};
+//! field scan pattern
+static const byte FIELD_SCAN[16][2] =
+{
+ {0,0},{0,1},{1,0},{0,2},
+ {0,3},{1,1},{1,2},{1,3},
+ {2,0},{2,1},{2,2},{2,3},
+ {3,0},{3,1},{3,2},{3,3}
+};
+
+static const byte FIELD_SCAN_1D[16] = {0, 4, 1, 8, 12, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15};
+static const byte FIELD_SCAN_DC[16] = {0, 2, 1, 8, 10, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15};
+//! used to control block sizes : Not used/16x16/16x8/8x16/8x8/8x4/4x8/4x4
+static const int BLOCK_STEP[8][2]=
+{
+ {0,0},{4,4},{4,2},{2,4},{2,2},{2,1},{1,2},{1,1}
+};
+
+//! single scan pattern
+static const byte SNGL_SCAN8x8[64][2] = {
+ {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1}, {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0},
+ {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4}, {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3},
+ {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4}, {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6},
+ {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5}, {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7}
+};
+
+static const byte SNGL_SCAN8x8_1D[64] =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+//! field scan pattern
+static const byte FIELD_SCAN8x8[64][2] = { // 8x8
+ {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2}, {2,0}, {1,3}, {0,5}, {0,6}, {0,7}, {1,4}, {2,1}, {3,0},
+ {2,2}, {1,5}, {1,6}, {1,7}, {2,3}, {3,1}, {4,0}, {3,2}, {2,4}, {2,5}, {2,6}, {2,7}, {3,3}, {4,1}, {5,0}, {4,2},
+ {3,4}, {3,5}, {3,6}, {3,7}, {4,3}, {5,1}, {6,0}, {5,2}, {4,4}, {4,5}, {4,6}, {4,7}, {5,3}, {6,1}, {6,2}, {5,4},
+ {5,5}, {5,6}, {5,7}, {6,3}, {7,0}, {7,1}, {6,4}, {6,5}, {6,6}, {6,7}, {7,2}, {7,3}, {7,4}, {7,5}, {7,6}, {7,7}
+};
+
+static const byte FIELD_SCAN8x8_1D[64] =
+{
+ 0, 8, 16, 1, 9, 24, 32, 17,
+ 2, 25, 40, 48, 56, 33, 10, 3,
+ 18, 41, 49, 57, 26, 11, 4, 19,
+ 34, 42, 50, 58, 27, 12, 5, 20,
+ 35, 43, 51, 59, 28, 13, 6, 21,
+ 36, 44, 52, 60, 29, 14, 22, 37,
+ 45, 53, 61, 30, 7, 15, 38, 46,
+ 54, 62, 23, 31, 39, 47, 55, 63
+};
+//! single scan pattern
+static const byte SCAN_YUV422[8][2] =
+{
+ {0,0},{0,1},
+ {1,0},{0,2},
+ {0,3},{1,1},
+ {1,2},{1,3}
+};
+
+static const unsigned char cbp_blk_chroma[8][4] =
+{ {16, 17, 18, 19},
+ {20, 21, 22, 23},
+ {24, 25, 26, 27},
+ {28, 29, 30, 31},
+ {32, 33, 34, 35},
+ {36, 37, 38, 39},
+ {40, 41, 42, 43},
+ {44, 45, 46, 47}
+};
+
+static const unsigned char cofuv_blk_x[3][8][4] =
+{ { {0, 1, 0, 1},
+ {0, 1, 0, 1},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0} },
+
+ { {0, 1, 0, 1},
+ {0, 1, 0, 1},
+ {0, 1, 0, 1},
+ {0, 1, 0, 1},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0} },
+
+ { {0, 1, 0, 1},
+ {2, 3, 2, 3},
+ {0, 1, 0, 1},
+ {2, 3, 2, 3},
+ {0, 1, 0, 1},
+ {2, 3, 2, 3},
+ {0, 1, 0, 1},
+ {2, 3, 2, 3} }
+};
+
+static const unsigned char cofuv_blk_y[3][8][4] =
+{
+ { { 0, 0, 1, 1},
+ { 0, 0, 1, 1},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0} },
+
+ { { 0, 0, 1, 1},
+ { 2, 2, 3, 3},
+ { 0, 0, 1, 1},
+ { 2, 2, 3, 3},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 0} },
+
+ { { 0, 0, 1, 1},
+ { 0, 0, 1, 1},
+ { 2, 2, 3, 3},
+ { 2, 2, 3, 3},
+ { 0, 0, 1, 1},
+ { 0, 0, 1, 1},
+ { 2, 2, 3, 3},
+ { 2, 2, 3, 3}}
+};
+
+
+extern void setup_slice_methods(Slice *currSlice);
+extern void get_neighbors(Macroblock *currMB, PixelPos *block, int mb_x, int mb_y, int blockshape_x);
+extern void get_neighbors0016(Macroblock *currMB, PixelPos *block);
+
+extern void start_macroblock (Slice *currSlice, Macroblock **currMB);
+extern int decode_one_macroblock(Macroblock *currMB, StorablePicture *dec_picture);
+extern Boolean exit_macroblock (Slice *currSlice, int eos_bit);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/mb_prediction.h b/Src/h264dec/ldecod/inc/mb_prediction.h
new file mode 100644
index 00000000..cea3bd45
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/mb_prediction.h
@@ -0,0 +1,33 @@
+
+/*!
+ *************************************************************************************
+ * \file mb_prediction.h
+ *
+ * \brief
+ * Functions for macroblock prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+
+#ifndef _MB_PREDICTION_H_
+#define _MB_PREDICTION_H_
+
+extern int mb_pred_intra4x4 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern int mb_pred_intra16x16 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern int mb_pred_intra8x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+
+extern void mb_pred_skip (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_sp_skip (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_p_inter8x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_p_inter16x16(Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_p_inter16x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_p_inter8x16 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_b_dspatial (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_b_dtemporal (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_b_inter8x8 (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+extern void mb_pred_ipcm (Macroblock *currMB);
+
+#endif
diff --git a/Src/h264dec/ldecod/inc/mbuffer.h b/Src/h264dec/ldecod/inc/mbuffer.h
new file mode 100644
index 00000000..c06e9fb0
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/mbuffer.h
@@ -0,0 +1,235 @@
+
+/*!
+ ***********************************************************************
+ * \file
+ * mbuffer.h
+ *
+ * \brief
+ * Frame buffer functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Sühring <suehring@hhi.de>
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+
+ * - Jill Boyce <jill.boyce@thomson.net>
+ * - Saurav K Bandyopadhyay <saurav@ieee.org>
+ * - Zhenyu Wu <Zhenyu.Wu@thomson.net
+ * - Purvin Pandit <Purvin.Pandit@thomson.net>
+ *
+ ***********************************************************************
+ */
+#ifndef _MBUFFER_H_
+#define _MBUFFER_H_
+
+#include "global.h"
+#include <bfc/platform/types.h>
+
+#define MAX_LIST_SIZE 33
+//! definition of pic motion parameters
+
+typedef struct pic_motion_params
+{
+ PicMotion **motion[2];
+ h264_ref_t ***field_references;
+ byte * mb_field; //!< field macroblock indicator
+ byte ** field_frame; //!< indicates if co_located is field or frame.
+ int padding[1];
+} PicMotionParams;
+
+typedef struct video_image
+{
+ imgpel **img;
+ imgpel *base_address;
+ size_t stride;
+ struct video_image *next; // for the memory cacher
+} VideoImage;
+//! definition a picture (field or frame)
+typedef struct storable_picture
+{
+ PictureStructure structure;
+
+ int poc;
+ int top_poc;
+ int bottom_poc;
+ int frame_poc;
+ h264_ref_t ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE];
+ h264_ref_t frm_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE];
+ h264_ref_t top_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE];
+ h264_ref_t bottom_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE];
+ unsigned frame_num;
+ unsigned recovery_frame;
+
+ int pic_num;
+ int long_term_pic_num;
+ int long_term_frame_idx;
+
+ byte is_long_term;
+ int used_for_reference;
+ int is_output;
+ int non_existing;
+
+ short max_slice_id;
+
+ int size_x, size_y, size_x_cr, size_y_cr;
+ int size_x_m1, size_y_m1, size_x_cr_m1, size_y_cr_m1;
+ int chroma_vector_adjustment;
+ int coded_frame;
+ int mb_aff_frame_flag;
+ unsigned PicWidthInMbs;
+ unsigned PicSizeInMbs;
+
+ //imgpel ** imgY; //!< Y picture component
+ union
+ {
+ VideoImage *plane_images[3]; // to ensure array alignment
+ struct
+ {
+ VideoImage *imgY;
+ VideoImage *imgUV[2]; //!< U and V picture components
+ };
+ };
+
+ struct pic_motion_params motion; //!< Motion info
+ struct pic_motion_params JVmotion[MAX_PLANE]; //!< Motion info for 4:4:4 independent mode decoding
+
+ short ** slice_id; //!< reference picture [mb_x][mb_y]
+
+ struct storable_picture *top_field; // for mb aff, if frame for referencing the top field
+ struct storable_picture *bottom_field; // for mb aff, if frame for referencing the bottom field
+ struct storable_picture *frame; // for mb aff, if field for referencing the combined frame
+
+ int slice_type;
+ int idr_flag;
+ int no_output_of_prior_pics_flag;
+ int long_term_reference_flag;
+ int adaptive_ref_pic_buffering_flag;
+
+ int chroma_format_idc;
+ int frame_mbs_only_flag;
+ int frame_cropping_flag;
+ int frame_cropping_rect_left_offset;
+ int frame_cropping_rect_right_offset;
+ int frame_cropping_rect_top_offset;
+ int frame_cropping_rect_bottom_offset;
+ int qp;
+ int chroma_qp_offset[2];
+ int slice_qp_delta;
+ DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations
+
+ // picture error concealment
+ int concealed_pic; //indicates if this is a concealed picture
+
+ // variables for tone mapping
+ int seiHasTone_mapping;
+ int tone_mapping_model_id;
+ int tonemapped_bit_depth;
+ imgpel* tone_mapping_lut; //!< tone mapping look up table
+
+ int retain_count; // benski> we're going to reference count these things
+ uint64_t time_code; // user-passed timecode for this frame
+} StorablePicture;
+
+//! definition a picture (field or frame)
+typedef struct colocated_params
+{
+ int mb_adaptive_frame_field_flag;
+ int size_x, size_y;
+ byte is_long_term;
+
+ MotionParams frame;
+ MotionParams top;
+ MotionParams bottom;
+
+} ColocatedParams;
+
+//! Frame Stores for Decoded Picture Buffer
+typedef struct frame_store
+{
+ int is_used; //!< 0=empty; 1=top; 2=bottom; 3=both fields (or frame)
+ int is_reference; //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+ int is_long_term; //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+ int is_orig_reference; //!< original marking by nal_ref_idc: 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+
+ int is_non_existent;
+
+ unsigned frame_num;
+ unsigned recovery_frame;
+
+ int frame_num_wrap;
+ int long_term_frame_idx;
+ int is_output;
+ int poc;
+
+ // picture error concealment
+ int concealment_reference;
+
+ StorablePicture *frame;
+ StorablePicture *top_field;
+ StorablePicture *bottom_field;
+
+} FrameStore;
+
+
+//! Decoded Picture Buffer
+typedef struct decoded_picture_buffer
+{
+ VideoParameters *p_Vid;
+ InputParameters *p_Inp;
+ FrameStore **fs;
+ FrameStore **fs_ref;
+ FrameStore **fs_ltref;
+ unsigned size;
+ unsigned used_size;
+ unsigned ref_frames_in_buffer;
+ unsigned ltref_frames_in_buffer;
+ int last_output_poc;
+ int max_long_term_pic_idx;
+
+ int init_done;
+ int num_ref_frames;
+
+ FrameStore *last_picture;
+} DecodedPictureBuffer;
+
+extern void init_dpb(VideoParameters *p_Vid);
+extern void free_dpb(VideoParameters *p_Vid);
+extern FrameStore* alloc_frame_store(void);
+extern void free_frame_store(VideoParameters *p_Vid, FrameStore* f);
+extern StorablePicture* alloc_storable_picture(VideoParameters *p_Vid, PictureStructure type, int size_x, int size_y, int size_x_cr, int size_y_cr);
+extern void free_storable_picture(VideoParameters *p_Vid, StorablePicture* p);
+extern void store_picture_in_dpb(VideoParameters *p_Vid, StorablePicture* p);
+extern void flush_dpb(VideoParameters *p_Vid);
+
+extern void dpb_split_field (VideoParameters *p_Vid, FrameStore *fs);
+extern void dpb_combine_field(VideoParameters *p_Vid, FrameStore *fs);
+extern void dpb_combine_field_yuv(VideoParameters *p_Vid, FrameStore *fs);
+
+extern void init_lists (Slice *currSlice);
+extern void reorder_ref_pic_list(VideoParameters *p_Vid, StorablePicture **list, char *list_size,
+ int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc,
+ int *abs_diff_pic_num_minus1, int *long_term_pic_idx);
+
+extern void init_mbaff_lists(VideoParameters *p_Vid);
+extern void alloc_ref_pic_list_reordering_buffer(Slice *currSlice);
+extern void free_ref_pic_list_reordering_buffer(Slice *currSlice);
+
+extern void fill_frame_num_gap(VideoParameters *p_Vid);
+
+extern ColocatedParams* alloc_colocated(VideoParameters *p_Vid, int size_x, int size_y,int mb_adaptive_frame_field_flag);
+extern void free_colocated(VideoParameters *p_Vid, ColocatedParams* p);
+extern void compute_colocated (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]);
+extern void compute_colocated_frames_mbs (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]);
+
+// For 4:4:4 independent mode
+extern void compute_colocated_JV ( Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6]);
+extern void copy_storable_param_JV( VideoParameters *p_Vid, PicMotionParams *JVplane, PicMotionParams *motion );
+
+// benski> decoded output pictures
+void out_storable_picture_get(VideoParameters *img, StorablePicture **pic);
+void out_storable_picture_add(VideoParameters *img, StorablePicture *pic);
+void out_storable_pictures_init(VideoParameters *img, size_t count);
+void out_storable_pictures_destroy(VideoParameters *img);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/mc_prediction.h b/Src/h264dec/ldecod/inc/mc_prediction.h
new file mode 100644
index 00000000..e3165e27
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/mc_prediction.h
@@ -0,0 +1,61 @@
+
+/*!
+ *************************************************************************************
+ * \file mc_prediction.h
+ *
+ * \brief
+ * definitions for motion compensated prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+
+#ifndef _MC_PREDICTION_H_
+#define _MC_PREDICTION_H_
+
+#include "global.h"
+#include "mbuffer.h"
+
+extern void get_block_luma (Macroblock *currMB, ColorPlane pl, StorablePicture *list, int x_pos, int y_pos, const short *motion_vector, int ver_block_size, int hor_block_size, h264_imgpel_macroblock_t block);
+
+extern void intra_cr_decoding (Macroblock *currMB, int yuv);
+extern void prepare_direct_params(Macroblock *currMB, StorablePicture *dec_picture, short pmvl0[2], short pmvl1[2],char *l0_rFrame, char *l1_rFrame);
+extern void perform_mc (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int block_size_x, int block_size_y, int curr_mb_field);
+extern void perform_mc16x16 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int list_offset, int curr_mb_field);
+extern void perform_mc16x8 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field);
+extern void perform_mc8x8 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field);
+extern void perform_mc8x16 (Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field);
+
+void weighted_mc_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+void weighted_mc_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+void weighted_mc_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+
+void weighted_mc_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+void weighted_mc_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+void weighted_mc_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+
+void weighted_mc_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+void weighted_mc_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+void weighted_mc_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+
+void weighted_bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+void weighted_bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+void weighted_bi_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+
+void weighted_bi_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+void weighted_bi_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+void weighted_bi_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+
+void weighted_bi_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+void weighted_bi_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+void weighted_bi_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+
+void bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0);
+void bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/meminput.h b/Src/h264dec/ldecod/inc/meminput.h
new file mode 100644
index 00000000..babaf2f0
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/meminput.h
@@ -0,0 +1,26 @@
+#ifndef _MEMINPUT_H
+#define _MEMINPUT_H
+#pragma once
+
+#include "nalucommon.h"
+#include <bfc/platform/types.h>
+
+typedef struct memory_input_struct
+{
+ const uint8_t *user_buffer;
+ size_t user_buffer_size;
+ size_t user_buffer_read;
+
+ uint8_t *Buf;
+ int resetting;
+ int skip_b_frames;
+} memory_input_t;
+
+int GetMemoryNALU (VideoParameters *p_Vid, NALU_t *nalu);
+void OpenMemory(VideoParameters *p_Vid, const char *fn);
+void CloseMemory(VideoParameters *p_Vid);
+void malloc_mem_input(VideoParameters *p_Vid);
+void free_mem_input(VideoParameters *p_Vid);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/nalu.h b/Src/h264dec/ldecod/inc/nalu.h
new file mode 100644
index 00000000..d10e6bfa
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/nalu.h
@@ -0,0 +1,28 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * nalu.h
+ * \brief
+ * Common NALU support functions
+ *
+ * \date 25 November 2002
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+#ifndef _NALU_H_
+#define _NALU_H_
+
+#include "nalucommon.h"
+
+extern void initBitsFile (VideoParameters *p_Vid);
+extern void CheckZeroByteNonVCL(VideoParameters *p_Vid, NALU_t *nalu);
+extern void CheckZeroByteVCL (VideoParameters *p_Vid, NALU_t *nalu);
+
+extern int read_next_nalu(VideoParameters *p_Vid, NALU_t *nalu);
+
+#endif
diff --git a/Src/h264dec/ldecod/inc/optim.h b/Src/h264dec/ldecod/inc/optim.h
new file mode 100644
index 00000000..bfcc07f8
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/optim.h
@@ -0,0 +1,46 @@
+#pragma once
+
+typedef struct optimized_functions
+{
+ //void (*itrans4x4)(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y);
+ void (*itrans8x8)(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x);
+
+ void (*weighted_mc_prediction16x16)(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+ void (*weighted_mc_prediction16x8)(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+ void (*weighted_mc_prediction8x8)(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom);
+
+ void (*weighted_bi_prediction16x16)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+ void (*weighted_bi_prediction16x8)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+ void (*weighted_bi_prediction8x8)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+
+ void (*bi_prediction8x8)(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0);
+
+ void (*copy_image_data_16x16_stride)(struct video_image *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source);
+ int (*code_from_bitstream_2d_5_4)(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab);
+ int (*code_from_bitstream_2d_17_4)(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab);
+ int (*code_from_bitstream_2d_16_1)(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab);
+} OptimizedFunctions;
+
+extern OptimizedFunctions opt;
+
+/* define macros for these function calls. this way we could do specific builds that call the functions directly, if we have the need */
+#ifdef _DEBUG
+#define opt_itrans4x4 (itrans4x4_c)
+#else
+#define opt_itrans4x4 (itrans4x4_mmx)
+#endif
+#define opt_itrans8x8 (opt.itrans8x8)
+
+#define opt_weighted_mc_prediction16x16 (opt.weighted_mc_prediction16x16)
+#define opt_weighted_mc_prediction16x8 (opt.weighted_mc_prediction16x8)
+#define opt_weighted_mc_prediction8x8 (opt.weighted_mc_prediction8x8)
+
+#define opt_weighted_bi_prediction16x16 (opt.weighted_bi_prediction16x16)
+#define opt_weighted_bi_prediction16x8 (opt.weighted_bi_prediction16x8)
+#define opt_weighted_bi_prediction8x8 (opt.weighted_bi_prediction8x8)
+
+#define opt_bi_prediction8x8 (opt.bi_prediction8x8)
+#define opt_copy_image_data_16x16_stride (opt.copy_image_data_16x16_stride)
+#define opt_code_from_bitstream_2d_5_4 (opt.code_from_bitstream_2d_5_4)
+#define opt_code_from_bitstream_2d_17_4 (opt.code_from_bitstream_2d_17_4)
+#define opt_code_from_bitstream_2d_16_1 (opt.code_from_bitstream_2d_16_1) \ No newline at end of file
diff --git a/Src/h264dec/ldecod/inc/output.h b/Src/h264dec/ldecod/inc/output.h
new file mode 100644
index 00000000..69e06f85
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/output.h
@@ -0,0 +1,27 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * output.h
+ * \brief
+ * Picture writing routine headers
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Suehring <suehring@hhi.de>
+ ***************************************************************************************
+ */
+
+#ifndef _OUTPUT_H_
+#define _OUTPUT_H_
+
+
+extern void write_stored_frame(VideoParameters *p_Vid, FrameStore *fs);
+extern void direct_output (VideoParameters *p_Vid, StorablePicture *p);
+extern void init_out_buffer (VideoParameters *p_Vid);
+extern void uninit_out_buffer (VideoParameters *p_Vid);
+
+#if (PAIR_FIELDS_IN_OUTPUT)
+extern void flush_pending_output(VideoParameters *p_Vid);
+#endif
+
+#endif //_OUTPUT_H_
diff --git a/Src/h264dec/ldecod/inc/parset.h b/Src/h264dec/ldecod/inc/parset.h
new file mode 100644
index 00000000..c433c26d
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/parset.h
@@ -0,0 +1,56 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * parset.h
+ * \brief
+ * Picture and Sequence Parameter Sets, decoder operations
+ *
+ * \date 25 November 2002
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+#ifndef _PARSET_H_
+#define _PARSET_H_
+
+#include "parsetcommon.h"
+#include "nalucommon.h"
+
+static const byte ZZ_SCAN[16] =
+{ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+static const byte ZZ_SCAN8[64] =
+{ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+extern void Scaling_List(int *scalingList, int sizeOfScalingList, Boolean *UseDefaultScalingMatrix, Bitstream *s);
+
+extern void InitVUI(seq_parameter_set_rbsp_t *sps);
+extern int ReadVUI(DataPartition *p, seq_parameter_set_rbsp_t *sps);
+extern int ReadHRDParameters(DataPartition *p, hrd_parameters_t *hrd);
+
+extern void PPSConsistencyCheck (pic_parameter_set_rbsp_t *pps);
+extern void SPSConsistencyCheck (seq_parameter_set_rbsp_t *sps);
+
+extern void MakePPSavailable (VideoParameters *p_Vid, int id, pic_parameter_set_rbsp_t *pps);
+extern void MakeSPSavailable (VideoParameters *p_Vid, int id, seq_parameter_set_rbsp_t *sps);
+
+extern void ProcessSPS (VideoParameters *p_Vid, NALU_t *nalu);
+extern void ProcessPPS (VideoParameters *p_Vid, NALU_t *nalu);
+
+extern void UseParameterSet (Slice *currSlice, int PicParsetId);
+
+extern void CleanUpPPS(VideoParameters *p_Vid);
+
+extern void activate_sps (VideoParameters *p_Vid, seq_parameter_set_rbsp_t *sps);
+extern void activate_pps (VideoParameters *p_Vid, pic_parameter_set_rbsp_t *pps);
+
+#endif
diff --git a/Src/h264dec/ldecod/inc/parsetcommon.h b/Src/h264dec/ldecod/inc/parsetcommon.h
new file mode 100644
index 00000000..16a64098
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/parsetcommon.h
@@ -0,0 +1,202 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * parsetcommon.h
+ * \brief
+ * Picture and Sequence Parameter Sets, structures common to encoder and decoder
+ *
+ * \date 25 November 2002
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+
+// In the JVT syntax, frequently flags are used that indicate the presence of
+// certain pieces of information in the NALU. Here, these flags are also
+// present. In the encoder, those bits indicate that the values signaled to
+// be present are meaningful and that this part of the syntax should be
+// written to the NALU. In the decoder, the flag indicates that information
+// was received from the decoded NALU and should be used henceforth.
+// The structure names were chosen as indicated in the JVT syntax
+
+#ifndef _PARSETCOMMON_H_
+#define _PARSETCOMMON_H_
+
+#include "defines.h"
+
+#define MAXIMUMPARSETRBSPSIZE 1500
+#define MAXIMUMPARSETNALUSIZE 1500
+
+#define MAXSPS 32
+#define MAXPPS 256
+
+#define MAXIMUMVALUEOFcpb_cnt 32
+typedef struct
+{
+ unsigned int cpb_cnt_minus1; // ue(v)
+ unsigned int bit_rate_scale; // u(4)
+ unsigned int cpb_size_scale; // u(4)
+ unsigned int bit_rate_value_minus1 [MAXIMUMVALUEOFcpb_cnt]; // ue(v)
+ unsigned int cpb_size_value_minus1 [MAXIMUMVALUEOFcpb_cnt]; // ue(v)
+ unsigned int cbr_flag [MAXIMUMVALUEOFcpb_cnt]; // u(1)
+ unsigned int initial_cpb_removal_delay_length_minus1; // u(5)
+ unsigned int cpb_removal_delay_length_minus1; // u(5)
+ unsigned int dpb_output_delay_length_minus1; // u(5)
+ unsigned int time_offset_length; // u(5)
+} hrd_parameters_t;
+
+
+typedef struct
+{
+ Boolean aspect_ratio_info_present_flag; // u(1)
+ unsigned int aspect_ratio_idc; // u(8)
+ unsigned short sar_width; // u(16)
+ unsigned short sar_height; // u(16)
+ Boolean overscan_info_present_flag; // u(1)
+ Boolean overscan_appropriate_flag; // u(1)
+ Boolean video_signal_type_present_flag; // u(1)
+ unsigned int video_format; // u(3)
+ Boolean video_full_range_flag; // u(1)
+ Boolean colour_description_present_flag; // u(1)
+ unsigned int colour_primaries; // u(8)
+ unsigned int transfer_characteristics; // u(8)
+ unsigned int matrix_coefficients; // u(8)
+ Boolean chroma_location_info_present_flag; // u(1)
+ unsigned int chroma_sample_loc_type_top_field; // ue(v)
+ unsigned int chroma_sample_loc_type_bottom_field; // ue(v)
+ Boolean timing_info_present_flag; // u(1)
+ unsigned int num_units_in_tick; // u(32)
+ unsigned int time_scale; // u(32)
+ Boolean fixed_frame_rate_flag; // u(1)
+ Boolean nal_hrd_parameters_present_flag; // u(1)
+ hrd_parameters_t nal_hrd_parameters; // hrd_paramters_t
+ Boolean vcl_hrd_parameters_present_flag; // u(1)
+ hrd_parameters_t vcl_hrd_parameters; // hrd_paramters_t
+ // if ((nal_hrd_parameters_present_flag || (vcl_hrd_parameters_present_flag))
+ Boolean low_delay_hrd_flag; // u(1)
+ Boolean pic_struct_present_flag; // u(1)
+ Boolean bitstream_restriction_flag; // u(1)
+ Boolean motion_vectors_over_pic_boundaries_flag; // u(1)
+ unsigned int max_bytes_per_pic_denom; // ue(v)
+ unsigned int max_bits_per_mb_denom; // ue(v)
+ unsigned int log2_max_mv_length_vertical; // ue(v)
+ unsigned int log2_max_mv_length_horizontal; // ue(v)
+ unsigned int num_reorder_frames; // ue(v)
+ unsigned int max_dec_frame_buffering; // ue(v)
+} vui_seq_parameters_t;
+
+
+#define MAXnum_slice_groups_minus1 8
+typedef struct
+{
+ Boolean Valid; // indicates the parameter set is valid
+ unsigned int pic_parameter_set_id; // ue(v)
+ unsigned int seq_parameter_set_id; // ue(v)
+ Boolean entropy_coding_mode_flag; // u(1)
+ Boolean transform_8x8_mode_flag; // u(1)
+
+ Boolean pic_scaling_matrix_present_flag; // u(1)
+ int pic_scaling_list_present_flag[12]; // u(1)
+ int ScalingList4x4[6][16]; // se(v)
+ int ScalingList8x8[6][64]; // se(v)
+ Boolean UseDefaultScalingMatrix4x4Flag[6];
+ Boolean UseDefaultScalingMatrix8x8Flag[6];
+
+ // if( pic_order_cnt_type < 2 ) in the sequence parameter set
+ Boolean bottom_field_pic_order_in_frame_present_flag; // u(1)
+ unsigned int num_slice_groups_minus1; // ue(v)
+ unsigned int slice_group_map_type; // ue(v)
+ // if( slice_group_map_type = = 0 )
+ unsigned int run_length_minus1[MAXnum_slice_groups_minus1]; // ue(v)
+ // else if( slice_group_map_type = = 2 )
+ unsigned int top_left[MAXnum_slice_groups_minus1]; // ue(v)
+ unsigned int bottom_right[MAXnum_slice_groups_minus1]; // ue(v)
+ // else if( slice_group_map_type = = 3 || 4 || 5
+ Boolean slice_group_change_direction_flag; // u(1)
+ unsigned int slice_group_change_rate_minus1; // ue(v)
+ // else if( slice_group_map_type = = 6 )
+ unsigned int pic_size_in_map_units_minus1; // ue(v)
+ byte *slice_group_id; // complete MBAmap u(v)
+
+ int num_ref_idx_l0_active_minus1; // ue(v)
+ int num_ref_idx_l1_active_minus1; // ue(v)
+ Boolean weighted_pred_flag; // u(1)
+ unsigned int weighted_bipred_idc; // u(2)
+ int pic_init_qp_minus26; // se(v)
+ int pic_init_qs_minus26; // se(v)
+ int chroma_qp_index_offset; // se(v)
+
+ int second_chroma_qp_index_offset; // se(v)
+
+ Boolean deblocking_filter_control_present_flag; // u(1)
+ Boolean constrained_intra_pred_flag; // u(1)
+ Boolean redundant_pic_cnt_present_flag; // u(1)
+} pic_parameter_set_rbsp_t;
+
+
+#define MAXnum_ref_frames_in_pic_order_cnt_cycle 256
+typedef struct
+{
+ Boolean Valid; // indicates the parameter set is valid
+
+ unsigned int profile_idc; // u(8)
+ Boolean constrained_set0_flag; // u(1)
+ Boolean constrained_set1_flag; // u(1)
+ Boolean constrained_set2_flag; // u(1)
+ Boolean constrained_set3_flag; // u(1)
+ unsigned int level_idc; // u(8)
+ unsigned int seq_parameter_set_id; // ue(v)
+ unsigned int chroma_format_idc; // ue(v)
+
+ Boolean seq_scaling_matrix_present_flag; // u(1)
+ int seq_scaling_list_present_flag[12]; // u(1)
+ int ScalingList4x4[6][16]; // se(v)
+ int ScalingList8x8[6][64]; // se(v)
+ Boolean UseDefaultScalingMatrix4x4Flag[6];
+ Boolean UseDefaultScalingMatrix8x8Flag[6];
+
+ unsigned int bit_depth_luma_minus8; // ue(v)
+ unsigned int bit_depth_chroma_minus8; // ue(v)
+ unsigned int log2_max_frame_num_minus4; // ue(v)
+ unsigned int pic_order_cnt_type;
+ // if( pic_order_cnt_type == 0 )
+ unsigned int log2_max_pic_order_cnt_lsb_minus4; // ue(v)
+ // else if( pic_order_cnt_type == 1 )
+ Boolean delta_pic_order_always_zero_flag; // u(1)
+ int offset_for_non_ref_pic; // se(v)
+ int offset_for_top_to_bottom_field; // se(v)
+ unsigned int num_ref_frames_in_pic_order_cnt_cycle; // ue(v)
+ // for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
+ int offset_for_ref_frame[MAXnum_ref_frames_in_pic_order_cnt_cycle]; // se(v)
+ unsigned int num_ref_frames; // ue(v)
+ Boolean gaps_in_frame_num_value_allowed_flag; // u(1)
+ unsigned int pic_width_in_mbs_minus1; // ue(v)
+ unsigned int pic_height_in_map_units_minus1; // ue(v)
+ Boolean frame_mbs_only_flag; // u(1)
+ // if( !frame_mbs_only_flag )
+ Boolean mb_adaptive_frame_field_flag; // u(1)
+ Boolean direct_8x8_inference_flag; // u(1)
+ Boolean frame_cropping_flag; // u(1)
+ unsigned int frame_cropping_rect_left_offset; // ue(v)
+ unsigned int frame_cropping_rect_right_offset; // ue(v)
+ unsigned int frame_cropping_rect_top_offset; // ue(v)
+ unsigned int frame_cropping_rect_bottom_offset; // ue(v)
+ Boolean vui_parameters_present_flag; // u(1)
+ vui_seq_parameters_t vui_seq_parameters; // vui_seq_parameters_t
+ unsigned separate_colour_plane_flag; // u(1)
+} seq_parameter_set_rbsp_t;
+
+pic_parameter_set_rbsp_t *AllocPPS (void);
+seq_parameter_set_rbsp_t *AllocSPS (void);
+
+void FreePPS (pic_parameter_set_rbsp_t *pps);
+void FreeSPS (seq_parameter_set_rbsp_t *sps);
+
+int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2);
+int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2);
+
+#endif
diff --git a/Src/h264dec/ldecod/inc/quant.h b/Src/h264dec/ldecod/inc/quant.h
new file mode 100644
index 00000000..01bde65f
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/quant.h
@@ -0,0 +1,169 @@
+
+/*!
+ ************************************************************************
+ * \file quant.h
+ *
+ * \brief
+ * definitions for quantization functions
+ *
+ * \author
+ *
+ ************************************************************************
+ */
+
+#ifndef _QUANT_H_
+#define _QUANT_H_
+
+// exported variables
+static const int dequant_coef8[6][64] =
+{
+ {
+ 20, 19, 25, 19, 20, 19, 25, 19,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+ 25, 24, 32, 24, 25, 24, 32, 24,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+ 20, 19, 25, 19, 20, 19, 25, 19,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+ 25, 24, 32, 24, 25, 24, 32, 24,
+ 19, 18, 24, 18, 19, 18, 24, 18
+ },
+ {
+ 22, 21, 28, 21, 22, 21, 28, 21,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+ 22, 21, 28, 21, 22, 21, 28, 21,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 21, 19, 26, 19, 21, 19, 26, 19
+ },
+ {
+ 26, 24, 33, 24, 26, 24, 33, 24,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+ 33, 31, 42, 31, 33, 31, 42, 31,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+ 26, 24, 33, 24, 26, 24, 33, 24,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+ 33, 31, 42, 31, 33, 31, 42, 31,
+ 24, 23, 31, 23, 24, 23, 31, 23
+ },
+ {
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+ 35, 33, 45, 33, 35, 33, 45, 33,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+ 35, 33, 45, 33, 35, 33, 45, 33,
+ 26, 25, 33, 25, 26, 25, 33, 25
+ },
+ {
+ 32, 30, 40, 30, 32, 30, 40, 30,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+ 40, 38, 51, 38, 40, 38, 51, 38,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+ 32, 30, 40, 30, 32, 30, 40, 30,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+ 40, 38, 51, 38, 40, 38, 51, 38,
+ 30, 28, 38, 28, 30, 28, 38, 28
+ },
+ {
+ 36, 34, 46, 34, 36, 34, 46, 34,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+ 46, 43, 58, 43, 46, 43, 58, 43,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+ 36, 34, 46, 34, 36, 34, 46, 34,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+ 46, 43, 58, 43, 46, 43, 58, 43,
+ 34, 32, 43, 32, 34, 32, 43, 32
+ }
+};
+
+
+//! Dequantization coefficients
+static const int dequant_coef[6][4][4] = {
+ {
+ { 10, 13, 10, 13},
+ { 13, 16, 13, 16},
+ { 10, 13, 10, 13},
+ { 13, 16, 13, 16}},
+ {
+ { 11, 14, 11, 14},
+ { 14, 18, 14, 18},
+ { 11, 14, 11, 14},
+ { 14, 18, 14, 18}},
+ {
+ { 13, 16, 13, 16},
+ { 16, 20, 16, 20},
+ { 13, 16, 13, 16},
+ { 16, 20, 16, 20}},
+ {
+ { 14, 18, 14, 18},
+ { 18, 23, 18, 23},
+ { 14, 18, 14, 18},
+ { 18, 23, 18, 23}},
+ {
+ { 16, 20, 16, 20},
+ { 20, 25, 20, 25},
+ { 16, 20, 16, 20},
+ { 20, 25, 20, 25}},
+ {
+ { 18, 23, 18, 23},
+ { 23, 29, 23, 29},
+ { 18, 23, 18, 23},
+ { 23, 29, 23, 29}}
+};
+
+static const int quant_coef[6][4][4] = {
+ {
+ { 13107, 8066, 13107, 8066},
+ { 8066, 5243, 8066, 5243},
+ { 13107, 8066, 13107, 8066},
+ { 8066, 5243, 8066, 5243}},
+ {
+ { 11916, 7490, 11916, 7490},
+ { 7490, 4660, 7490, 4660},
+ { 11916, 7490, 11916, 7490},
+ { 7490, 4660, 7490, 4660}},
+ {
+ { 10082, 6554, 10082, 6554},
+ { 6554, 4194, 6554, 4194},
+ { 10082, 6554, 10082, 6554},
+ { 6554, 4194, 6554, 4194}},
+ {
+ { 9362, 5825, 9362, 5825},
+ { 5825, 3647, 5825, 3647},
+ { 9362, 5825, 9362, 5825},
+ { 5825, 3647, 5825, 3647}},
+ {
+ { 8192, 5243, 8192, 5243},
+ { 5243, 3355, 5243, 3355},
+ { 8192, 5243, 8192, 5243},
+ { 5243, 3355, 5243, 3355}},
+ {
+ { 7282, 4559, 7282, 4559},
+ { 4559, 2893, 4559, 2893},
+ { 7282, 4559, 7282, 4559},
+ { 4559, 2893, 4559, 2893}}
+};
+
+// SP decoding parameter (EQ. 8-425)
+static const int A[4][4] = {
+ { 16, 20, 16, 20},
+ { 20, 25, 20, 25},
+ { 16, 20, 16, 20},
+ { 20, 25, 20, 25}
+};
+
+// exported functions
+// quantization initialization
+extern void init_qp_process (VideoParameters *p_Vid);
+extern void free_qp_matrices(VideoParameters *p_Vid);
+
+// For Q-matrix
+extern void assign_quant_params (Slice *currslice);
+extern void CalculateQuant4x4Param(Slice *currslice);
+
+
+#endif
+
diff --git a/Src/h264dec/ldecod/inc/sei.h b/Src/h264dec/ldecod/inc/sei.h
new file mode 100644
index 00000000..943428fd
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/sei.h
@@ -0,0 +1,100 @@
+
+/*!
+ *************************************************************************************
+ * \file sei.h
+ *
+ * \brief
+ * Prototypes for sei.c
+ *************************************************************************************
+ */
+
+#ifndef SEI_H
+#define SEI_H
+
+typedef enum {
+ SEI_BUFFERING_PERIOD = 0,
+ SEI_PIC_TIMING,
+ SEI_PAN_SCAN_RECT,
+ SEI_FILLER_PAYLOAD,
+ SEI_USER_DATA_REGISTERED_ITU_T_T35,
+ SEI_USER_DATA_UNREGISTERED,
+ SEI_RECOVERY_POINT,
+ SEI_DEC_REF_PIC_MARKING_REPETITION,
+ SEI_SPARE_PIC,
+ SEI_SCENE_INFO,
+ SEI_SUB_SEQ_INFO,
+ SEI_SUB_SEQ_LAYER_CHARACTERISTICS,
+ SEI_SUB_SEQ_CHARACTERISTICS,
+ SEI_FULL_FRAME_FREEZE,
+ SEI_FULL_FRAME_FREEZE_RELEASE,
+ SEI_FULL_FRAME_SNAPSHOT,
+ SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START,
+ SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END,
+ SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET,
+ SEI_FILM_GRAIN_CHARACTERISTICS,
+ SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE,
+ SEI_STEREO_VIDEO_INFO,
+ SEI_POST_FILTER_HINTS,
+ SEI_TONE_MAPPING,
+
+ SEI_MAX_ELEMENTS //!< number of maximum syntax elements
+} SEI_type;
+
+#define MAX_FN 256
+// tone mapping information
+#define MAX_CODED_BIT_DEPTH 12
+#define MAX_SEI_BIT_DEPTH 12
+#define MAX_NUM_PIVOTS (1<<MAX_CODED_BIT_DEPTH)
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+typedef struct tone_mapping_struct_s
+{
+ Boolean seiHasTone_mapping;
+ unsigned int tone_map_repetition_period;
+ unsigned char coded_data_bit_depth;
+ unsigned char sei_bit_depth;
+ unsigned int model_id;
+ unsigned int count;
+
+ imgpel lut[1<<MAX_CODED_BIT_DEPTH]; //<! look up table for mapping the coded data value to output data value
+
+ Bitstream *data;
+ int payloadSize;
+} ToneMappingSEI;
+
+#endif
+
+void InterpretSEIMessage(byte* msg, int size, VideoParameters *p_Vid);
+void interpret_spare_pic( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_subsequence_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_subsequence_layer_characteristics_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_subsequence_characteristics_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_scene_information( byte* payload, int size, VideoParameters *p_Vid ); // JVT-D099
+void interpret_user_data_registered_itu_t_t35_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_user_data_unregistered_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_pan_scan_rect_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_recovery_point_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_filler_payload_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_dec_ref_pic_marking_repetition_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_full_frame_freeze_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_full_frame_freeze_release_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_full_frame_snapshot_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_progressive_refinement_start_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_progressive_refinement_end_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_motion_constrained_slice_group_set_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_reserved_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_buffering_period_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_picture_timing_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_film_grain_characteristics_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_deblocking_filter_display_preference_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_stereo_video_info_info( byte* payload, int size, VideoParameters *p_Vid );
+void interpret_post_filter_hints_info( byte* payload, int size, VideoParameters *p_Vid );
+// functions for tone mapping SEI message
+void interpret_tone_mapping( byte* payload, int size, VideoParameters *p_Vid );
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+void tone_map(imgpel** imgX, imgpel* lut, int size_x, int size_y);
+void init_tone_mapping_sei(ToneMappingSEI *seiToneMapping);
+void update_tone_mapping_sei(ToneMappingSEI *seiToneMapping);
+#endif
+#endif
diff --git a/Src/h264dec/ldecod/inc/transform8x8.h b/Src/h264dec/ldecod/inc/transform8x8.h
new file mode 100644
index 00000000..88cfafee
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/transform8x8.h
@@ -0,0 +1,24 @@
+/*!
+ ***************************************************************************
+ *
+ * \file transform8x8.h
+ *
+ * \brief
+ * prototypes of 8x8 transform functions
+ *
+ * \date
+ * 9. October 2003
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Yuri Vatis
+ **************************************************************************/
+
+#ifndef _TRANSFORM8X8_H_
+#define _TRANSFORM8X8_H_
+
+extern void itrans8x8_mmx(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x);
+extern void itrans8x8_sse2(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x);
+extern void itrans8x8_c(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x);
+extern void itrans8x8_lossless(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x);
+#endif
diff --git a/Src/h264dec/ldecod/inc/vlc.h b/Src/h264dec/ldecod/inc/vlc.h
new file mode 100644
index 00000000..9a75ad3a
--- /dev/null
+++ b/Src/h264dec/ldecod/inc/vlc.h
@@ -0,0 +1,122 @@
+
+/*!
+ ************************************************************************
+ * \file vlc.h
+ *
+ * \brief
+ * header for (CA)VLC coding functions
+ *
+ * \author
+ * Karsten Suehring
+ *
+ ************************************************************************
+ */
+
+#ifndef _VLC_H_
+#define _VLC_H_
+
+#include <bfc/platform/types.h>
+
+//! gives CBP value from codeword number, both for intra and inter
+static const byte NCBP[2][48][2]=
+{
+ { // 0 1 2 3 4 5 6 7 8 9 10 11
+ {15, 0},{ 0, 1},{ 7, 2},{11, 4},{13, 8},{14, 3},{ 3, 5},{ 5,10},{10,12},{12,15},{ 1, 7},{ 2,11},
+ { 4,13},{ 8,14},{ 6, 6},{ 9, 9},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},
+ { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},
+ { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}
+ },
+ {
+ {47, 0},{31,16},{15, 1},{ 0, 2},{23, 4},{27, 8},{29,32},{30, 3},{ 7, 5},{11,10},{13,12},{14,15},
+ {39,47},{43, 7},{45,11},{46,13},{16,14},{ 3, 6},{ 5, 9},{10,31},{12,35},{19,37},{21,42},{26,44},
+ {28,33},{35,34},{37,36},{42,40},{44,39},{ 1,43},{ 2,45},{ 4,46},{ 8,17},{17,18},{18,20},{20,24},
+ {24,19},{ 6,21},{ 9,26},{22,28},{25,23},{32,27},{33,29},{34,30},{36,22},{40,25},{38,38},{41,41}
+ }
+};
+
+//! for the linfo_levrun_inter routine
+static const byte NTAB1[4][8][2] =
+{
+ {{1,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}},
+ {{1,1},{1,2},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}},
+ {{2,0},{1,3},{1,4},{1,5},{0,0},{0,0},{0,0},{0,0}},
+ {{3,0},{2,1},{2,2},{1,6},{1,7},{1,8},{1,9},{4,0}},
+};
+
+static const byte LEVRUN1[16]=
+{
+ 4,2,2,1,1,1,1,1,1,1,0,0,0,0,0,0,
+};
+
+
+static const byte NTAB2[4][8][2] =
+{
+ {{1,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}},
+ {{1,1},{2,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}},
+ {{1,2},{3,0},{4,0},{5,0},{0,0},{0,0},{0,0},{0,0}},
+ {{1,3},{1,4},{2,1},{3,1},{6,0},{7,0},{8,0},{9,0}},
+};
+
+//! for the linfo_levrun__c2x2 routine
+static const byte LEVRUN3[4] =
+{
+ 2,1,0,0
+};
+
+static const byte NTAB3[2][2][2] =
+{
+ {{1,0},{0,0}},
+ {{2,0},{1,1}},
+};
+
+extern int se_v (const char *tracestring, Bitstream *bitstream);
+extern int ue_v (const char *tracestring, Bitstream *bitstream);
+extern Boolean u_1 (const char *tracestring, Bitstream *bitstream);
+extern int u_v (int LenInBits, const char *tracestring, Bitstream *bitstream);
+extern int i_v (int LenInBits, const char *tracestring, Bitstream *bitstream);
+
+// CAVLC mapping
+extern void linfo_ue(int len, int info, int *value1, int *dummy);
+extern void linfo_se(int len, int info, int *value1, int *dummy);
+
+extern void linfo_cbp_intra_normal(int len,int info,int *cbp, int *dummy);
+extern void linfo_cbp_inter_normal(int len,int info,int *cbp, int *dummy);
+extern void linfo_cbp_intra_other(int len,int info,int *cbp, int *dummy);
+extern void linfo_cbp_inter_other(int len,int info,int *cbp, int *dummy);
+
+extern void linfo_levrun_inter(int len,int info,int *level,int *irun);
+extern void linfo_levrun_c2x2(int len,int info,int *level,int *irun);
+
+extern int uvlc_startcode_follows(Slice *currSlice, int dummy);
+
+extern int readSyntaxElement_VLC (SyntaxElement *sym, Bitstream *currStream);
+extern int readSyntaxElement_UVLC(SyntaxElement *, struct datapartition *);
+extern int readSyntaxElement_Intra4x4PredictionMode(SyntaxElement *sym, Bitstream *currStream);
+
+extern int GetVLCSymbol (const uint8_t buffer[],int totbitoffset,int *info, int bytecount);
+extern int GetVLCSymbol_IntraMode (const uint8_t buffer[],int totbitoffset,int *info, int bytecount);
+
+extern int readSyntaxElement_FLC (Bitstream *currStream, int numbits);
+extern int readSyntaxElement_NumCoeffTrailingOnes (SyntaxElement *sym, Bitstream *currStream, int vlcnum);
+extern int readSyntaxElement_NumCoeffTrailingOnesChromaDC(VideoParameters *p_Vid, SyntaxElement *sym, Bitstream *currStream);
+extern int readSyntaxElement_Level_VLC0 (Bitstream *currStream);
+extern int readSyntaxElement_Level_VLCN (int vlc, Bitstream *currStream);
+extern int readSyntaxElement_TotalZeros (Bitstream *currStream, int vlcnum);
+extern int readSyntaxElement_TotalZerosChromaDC (VideoParameters *p_Vid, Bitstream *currStream, int vlcnum);
+extern int readSyntaxElement_Run (Bitstream *currStream, int vlcnum);
+extern int GetBits (const uint8_t buffer[],int totbitoffset,int *info, int bitcount, int numbits);
+
+
+extern int more_rbsp_data (const uint8_t buffer[],int totbitoffset,int bytecount);
+
+int code_from_bitstream_2d_17_4_c(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab);
+int code_from_bitstream_2d_17_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab);
+
+int code_from_bitstream_2d_5_4_c(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab);
+int code_from_bitstream_2d_5_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab);
+
+int code_from_bitstream_2d_16_1_c(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab);
+int code_from_bitstream_2d_16_1_sse2(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab);
+
+#endif
+
diff --git a/Src/h264dec/ldecod/src/biari.asm b/Src/h264dec/ldecod/src/biari.asm
new file mode 100644
index 00000000..ca5f3d6d
--- /dev/null
+++ b/Src/h264dec/ldecod/src/biari.asm
@@ -0,0 +1,2540 @@
+.686
+.XMM
+.model FLAT
+
+; Slice
+tex_ctx@Slice = 100
+coeff@Slice = 15632
+coeff_ctr@Slice = 15760
+pos@Slice = 15764
+last_dquant@Slice = 88
+mot_ctx@Slice = 96
+slice_type@Slice = 64
+
+; VideoParameters
+structure@VideoParameters = 697200
+dec_picture@VideoParameters = 698192
+bitdepth_chroma_qp_scale@VideoParameters = 697456
+
+; Macroblock
+p_Slice@Macroblock = 0
+p_Vid@Macroblock = 4
+qp@macroblock = 60
+qp_scaled@Macroblock = 72
+mb_field@Macroblock = 344
+read_and_store_CBP_block_bit@Macroblock = 400
+
+; StorablePicture
+structure@StorablePicture = 0
+chroma_qp_offset@StorablePicture = 158688
+
+; TextureInfoContexts
+map_contexts@TextureInfoContexts = 436
+last_contexts@TextureInfoContexts = 3252
+one_contexts@TextureInfoContexts = 6068
+abs_contexts@TextureInfoContexts = 6508
+
+_DATA SEGMENT
+_pos2ctx_map DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map8x8
+ DD FLAT:_pos2ctx_map8x4
+ DD FLAT:_pos2ctx_map8x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map2x4c
+ DD FLAT:_pos2ctx_map4x4c
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map8x8
+ DD FLAT:_pos2ctx_map8x4
+ DD FLAT:_pos2ctx_map8x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map8x8
+ DD FLAT:_pos2ctx_map8x4
+ DD FLAT:_pos2ctx_map8x4
+ DD FLAT:_pos2ctx_map4x4
+_pos2ctx_map_int DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map8x8i
+ DD FLAT:_pos2ctx_map8x4i
+ DD FLAT:_pos2ctx_map4x8i
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map2x4c
+ DD FLAT:_pos2ctx_map4x4c
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map8x8i
+ DD FLAT:_pos2ctx_map8x4i
+ DD FLAT:_pos2ctx_map8x4i
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map4x4
+ DD FLAT:_pos2ctx_map8x8i
+ DD FLAT:_pos2ctx_map8x4i
+ DD FLAT:_pos2ctx_map8x4i
+ DD FLAT:_pos2ctx_map4x4
+_pos2ctx_last DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last8x8
+ DD FLAT:_pos2ctx_last8x4
+ DD FLAT:_pos2ctx_last8x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last2x4c
+ DD FLAT:_pos2ctx_last4x4c
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last8x8
+ DD FLAT:_pos2ctx_last8x4
+ DD FLAT:_pos2ctx_last8x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last4x4
+ DD FLAT:_pos2ctx_last8x8
+ DD FLAT:_pos2ctx_last8x4
+ DD FLAT:_pos2ctx_last8x4
+ DD FLAT:_pos2ctx_last4x4
+_DATA ENDS
+
+CONST SEGMENT
+_rLPS_table_64x4 DB 080H
+ DB 080H
+ DB 080H
+ DB 07bH
+ DB 074H
+ DB 06fH
+ DB 069H
+ DB 064H
+ DB 05fH
+ DB 05aH
+ DB 055H
+ DB 051H
+ DB 04dH
+ DB 049H
+ DB 045H
+ DB 042H
+ DB 03eH
+ DB 03bH
+ DB 038H
+ DB 035H
+ DB 033H
+ DB 030H
+ DB 02eH
+ DB 02bH
+ DB 029H
+ DB 027H
+ DB 025H
+ DB 023H
+ DB 021H
+ DB 020H
+ DB 01eH
+ DB 01dH
+ DB 01bH
+ DB 01aH
+ DB 018H
+ DB 017H
+ DB 016H
+ DB 015H
+ DB 014H
+ DB 013H
+ DB 012H
+ DB 011H
+ DB 010H
+ DB 0fH
+ DB 0eH
+ DB 0eH
+ DB 0dH
+ DB 0cH
+ DB 0cH
+ DB 0bH
+ DB 0bH
+ DB 0aH
+ DB 0aH
+ DB 09H
+ DB 09H
+ DB 08H
+ DB 08H
+ DB 07H
+ DB 07H
+ DB 07H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 02H
+ DB 0b0H
+ DB 0a7H
+ DB 09eH
+ DB 096H
+ DB 08eH
+ DB 087H
+ DB 080H
+ DB 07aH
+ DB 074H
+ DB 06eH
+ DB 068H
+ DB 063H
+ DB 05eH
+ DB 059H
+ DB 055H
+ DB 050H
+ DB 04cH
+ DB 048H
+ DB 045H
+ DB 041H
+ DB 03eH
+ DB 03bH
+ DB 038H
+ DB 035H
+ DB 032H
+ DB 030H
+ DB 02dH
+ DB 02bH
+ DB 029H
+ DB 027H
+ DB 025H
+ DB 023H
+ DB 021H
+ DB 01fH
+ DB 01eH
+ DB 01cH
+ DB 01bH
+ DB 01aH
+ DB 018H
+ DB 017H
+ DB 016H
+ DB 015H
+ DB 014H
+ DB 013H
+ DB 012H
+ DB 011H
+ DB 010H
+ DB 0fH
+ DB 0eH
+ DB 0eH
+ DB 0dH
+ DB 0cH
+ DB 0cH
+ DB 0bH
+ DB 0bH
+ DB 0aH
+ DB 09H
+ DB 09H
+ DB 09H
+ DB 08H
+ DB 08H
+ DB 07H
+ DB 07H
+ DB 02H
+ DB 0d0H
+ DB 0c5H
+ DB 0bbH
+ DB 0b2H
+ DB 0a9H
+ DB 0a0H
+ DB 098H
+ DB 090H
+ DB 089H
+ DB 082H
+ DB 07bH
+ DB 075H
+ DB 06fH
+ DB 069H
+ DB 064H
+ DB 05fH
+ DB 05aH
+ DB 056H
+ DB 051H
+ DB 04dH
+ DB 049H
+ DB 045H
+ DB 042H
+ DB 03fH
+ DB 03bH
+ DB 038H
+ DB 036H
+ DB 033H
+ DB 030H
+ DB 02eH
+ DB 02bH
+ DB 029H
+ DB 027H
+ DB 025H
+ DB 023H
+ DB 021H
+ DB 020H
+ DB 01eH
+ DB 01dH
+ DB 01bH
+ DB 01aH
+ DB 019H
+ DB 017H
+ DB 016H
+ DB 015H
+ DB 014H
+ DB 013H
+ DB 012H
+ DB 011H
+ DB 010H
+ DB 0fH
+ DB 0fH
+ DB 0eH
+ DB 0dH
+ DB 0cH
+ DB 0cH
+ DB 0bH
+ DB 0bH
+ DB 0aH
+ DB 0aH
+ DB 09H
+ DB 09H
+ DB 08H
+ DB 02H
+ DB 0f0H
+ DB 0e3H
+ DB 0d8H
+ DB 0cdH
+ DB 0c3H
+ DB 0b9H
+ DB 0afH
+ DB 0a6H
+ DB 09eH
+ DB 096H
+ DB 08eH
+ DB 087H
+ DB 080H
+ DB 07aH
+ DB 074H
+ DB 06eH
+ DB 068H
+ DB 063H
+ DB 05eH
+ DB 059H
+ DB 055H
+ DB 050H
+ DB 04cH
+ DB 048H
+ DB 045H
+ DB 041H
+ DB 03eH
+ DB 03bH
+ DB 038H
+ DB 035H
+ DB 032H
+ DB 030H
+ DB 02dH
+ DB 02bH
+ DB 029H
+ DB 027H
+ DB 025H
+ DB 023H
+ DB 021H
+ DB 01fH
+ DB 01eH
+ DB 01cH
+ DB 01bH
+ DB 019H
+ DB 018H
+ DB 017H
+ DB 016H
+ DB 015H
+ DB 014H
+ DB 013H
+ DB 012H
+ DB 011H
+ DB 010H
+ DB 0fH
+ DB 0eH
+ DB 0eH
+ DB 0dH
+ DB 0cH
+ DB 0cH
+ DB 0bH
+ DB 0bH
+ DB 0aH
+ DB 09H
+ DB 02H
+_AC_next_state_MPS_64 DB 01H
+ DB 02H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0eH
+ DB 0fH
+ DB 010H
+ DB 011H
+ DB 012H
+ DB 013H
+ DB 014H
+ DB 015H
+ DB 016H
+ DB 017H
+ DB 018H
+ DB 019H
+ DB 01aH
+ DB 01bH
+ DB 01cH
+ DB 01dH
+ DB 01eH
+ DB 01fH
+ DB 020H
+ DB 021H
+ DB 022H
+ DB 023H
+ DB 024H
+ DB 025H
+ DB 026H
+ DB 027H
+ DB 028H
+ DB 029H
+ DB 02aH
+ DB 02bH
+ DB 02cH
+ DB 02dH
+ DB 02eH
+ DB 02fH
+ DB 030H
+ DB 031H
+ DB 032H
+ DB 033H
+ DB 034H
+ DB 035H
+ DB 036H
+ DB 037H
+ DB 038H
+ DB 039H
+ DB 03aH
+ DB 03bH
+ DB 03cH
+ DB 03dH
+ DB 03eH
+ DB 03eH
+ DB 03fH
+_AC_next_state_LPS_64 DB 00H
+ DB 00H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 04H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 09H
+ DB 0bH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0dH
+ DB 0fH
+ DB 0fH
+ DB 010H
+ DB 010H
+ DB 012H
+ DB 012H
+ DB 013H
+ DB 013H
+ DB 015H
+ DB 015H
+ DB 016H
+ DB 016H
+ DB 017H
+ DB 018H
+ DB 018H
+ DB 019H
+ DB 01aH
+ DB 01aH
+ DB 01bH
+ DB 01bH
+ DB 01cH
+ DB 01dH
+ DB 01dH
+ DB 01eH
+ DB 01eH
+ DB 01eH
+ DB 01fH
+ DB 020H
+ DB 020H
+ DB 021H
+ DB 021H
+ DB 021H
+ DB 022H
+ DB 022H
+ DB 023H
+ DB 023H
+ DB 023H
+ DB 024H
+ DB 024H
+ DB 024H
+ DB 025H
+ DB 025H
+ DB 025H
+ DB 026H
+ DB 026H
+ DB 03fH
+_renorm_table_32 DB 06H
+ DB 05H
+ DB 04H
+ DB 04H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ _renorm_table_256 DB 06H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+_maxpos DB 0fH
+ DB 0eH
+ DB 03fH
+ DB 01fH
+ DB 01fH
+ DB 0fH
+ DB 03H
+ DB 0eH
+ DB 07H
+ DB 0fH
+ DB 0fH
+ DB 0eH
+ DB 03fH
+ DB 01fH
+ DB 01fH
+ DB 0fH
+ DB 0fH
+ DB 0eH
+ DB 03fH
+ DB 01fH
+ DB 01fH
+ DB 0fH
+ ORG $+2
+_c1isdc DB 01H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ ORG $+2
+_type2ctx_bcbp DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 05H
+ DB 05H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0dH
+ DB 0eH
+ DB 010H
+ DB 011H
+ DB 012H
+ DB 013H
+ DB 013H
+ DB 014H
+ ORG $+2
+_type2ctx_map DW 00H
+DW 010H
+DW 020H
+DW 030H
+DW 040H
+DW 050H
+DW 060H
+DW 070H
+DW 060H
+DW 060H
+DW 0A0H
+DW 0B0H
+DW 0C0H
+DW 0D0H
+DW 0E0H
+DW 0F0H
+DW 0100H
+DW 0110H
+DW 0120H
+DW 0130H
+DW 0140H
+DW 0150H
+ ORG $+2
+_type2ctx_last DW 00H
+DW 010H
+DW 020H
+DW 030H
+DW 040H
+DW 050H
+DW 060H
+DW 070H
+DW 060H
+DW 060H
+DW 0A0H
+DW 0B0H
+DW 0C0H
+DW 0D0H
+DW 0E0H
+DW 0F0H
+DW 0100H
+DW 0110H
+DW 0120H
+DW 0130H
+DW 0140H
+DW 0150H
+ ORG $+2
+_type2ctx_one DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 05H
+ DB 05H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0dH
+ DB 0eH
+ DB 010H
+ DB 011H
+ DB 012H
+ DB 013H
+ DB 013H
+ DB 014H
+ ORG $+2
+_type2ctx_abs DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 05H
+ DB 05H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0dH
+ DB 0eH
+ DB 010H
+ DB 011H
+ DB 012H
+ DB 013H
+ DB 013H
+ DB 014H
+ ORG $+2
+plus_one_clip4 DD 1,2,3,4,4
+plus_one_clip3 DD 1,2,3,3
+_max_c2 DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip3
+ DD plus_one_clip4
+ DD plus_one_clip3
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ DD plus_one_clip4
+ ORG $+6
+_pos2ctx_map8x8 DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 05H
+ DB 04H
+ DB 04H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 05H
+ DB 05H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 03H
+ DB 03H
+ DB 06H
+ DB 07H
+ DB 07H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 09H
+ DB 08H
+ DB 07H
+ DB 07H
+ DB 06H
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0bH
+ DB 06H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0eH
+ DB 0aH
+ DB 09H
+ DB 08H
+ DB 06H
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0bH
+ DB 06H
+ DB 09H
+ DB 0eH
+ DB 0aH
+ DB 09H
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0bH
+ DB 0eH
+ DB 0aH
+ DB 0cH
+ DB 0eH
+_pos2ctx_map8x4 DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 09H
+ DB 08H
+ DB 06H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 09H
+ DB 08H
+ DB 06H
+ DB 0cH
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 09H
+ DB 0dH
+ DB 0dH
+ DB 0eH
+ DB 0eH
+_pos2ctx_map4x4 DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0eH
+ DB 0eH
+_pos2ctx_map2x4c DB 00H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+_pos2ctx_map4x4c DB 00H
+ DB 00H
+ DB 00H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+_pos2ctx_map8x8i DB 00H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 07H
+ DB 07H
+ DB 07H
+ DB 08H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 08H
+ DB 0bH
+ DB 0cH
+ DB 0bH
+ DB 09H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 08H
+ DB 0bH
+ DB 0cH
+ DB 0bH
+ DB 09H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 08H
+ DB 0bH
+ DB 0cH
+ DB 0bH
+ DB 09H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 08H
+ DB 0dH
+ DB 0dH
+ DB 09H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 08H
+ DB 0dH
+ DB 0dH
+ DB 09H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 0eH
+ DB 0eH
+ DB 0eH
+ DB 0eH
+ DB 0eH
+ DB 0eH
+_pos2ctx_map8x4i DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 03H
+ DB 04H
+ DB 07H
+ DB 06H
+ DB 08H
+ DB 09H
+ DB 07H
+ DB 06H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0cH
+ DB 0aH
+ DB 0bH
+ DB 0dH
+ DB 0dH
+ DB 0eH
+ DB 0eH
+ DB 0eH
+_pos2ctx_map4x8i DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 02H
+ DB 07H
+ DB 07H
+ DB 08H
+ DB 08H
+ DB 08H
+ DB 05H
+ DB 06H
+ DB 09H
+ DB 0aH
+ DB 0aH
+ DB 0bH
+ DB 0bH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0dH
+ DB 0eH
+ DB 0eH
+ DB 0eH
+_pos2ctx_last8x8 DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 05H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 06H
+ DB 07H
+ DB 07H
+ DB 07H
+ DB 07H
+ DB 08H
+ DB 08H
+ DB 08H
+ DB 08H
+_pos2ctx_last8x4 DB 00H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 03H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 04H
+ DB 05H
+ DB 05H
+ DB 06H
+ DB 06H
+ DB 07H
+ DB 07H
+ DB 08H
+ DB 08H
+_pos2ctx_last4x4 DB 00H
+ DB 01H
+ DB 02H
+ DB 03H
+ DB 04H
+ DB 05H
+ DB 06H
+ DB 07H
+ DB 08H
+ DB 09H
+ DB 0aH
+ DB 0bH
+ DB 0cH
+ DB 0dH
+ DB 0eH
+ DB 0fH
+_pos2ctx_last2x4c DB 00H
+ DB 00H
+ DB 01H
+ DB 01H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+ DB 02H
+_pos2ctx_last4x4c DB 00, 00, 00, 00, 01, 01, 01, 01, 02, 02, 02, 02, 02, 02, 02, 02
+plus_one_clip0_4 DD 0,2,3,4,4
+
+align 16
+_QP_SCALE_CR DD 00H
+ DD 01H
+ DD 02H
+ DD 03H
+ DD 04H
+ DD 05H
+ DD 06H
+ DD 07H
+ DD 08H
+ DD 09H
+ DD 0aH
+ DD 0bH
+ DD 0cH
+ DD 0dH
+ DD 0eH
+ DD 0fH
+ DD 010H
+ DD 011H
+ DD 012H
+ DD 013H
+ DD 014H
+ DD 015H
+ DD 016H
+ DD 017H
+ DD 018H
+ DD 019H
+ DD 01aH
+ DD 01bH
+ DD 01cH
+ DD 01dH
+ DD 01dH
+ DD 01eH
+ DD 01fH
+ DD 020H
+ DD 020H
+ DD 021H
+ DD 022H
+ DD 022H
+ DD 023H
+ DD 023H
+ DD 024H
+ DD 024H
+ DD 025H
+ DD 025H
+ DD 025H
+ DD 026H
+ DD 026H
+ DD 026H
+ DD 027H
+ DD 027H
+ DD 027H
+ DD 027H
+ align 16
+_51 DD 51
+CONST ENDS
+
+
+PUBLIC _biari_decode_symbol
+_TEXT SEGMENT
+dep = 4 ; size = 4
+bi_ct = 8 ; size = 4
+_biari_decode_symbol PROC
+ STACKOFFSET=0
+ mov edx, DWORD PTR dep[esp+STACKOFFSET] ; edx = dep
+ STACKOFFSET=STACKOFFSET+4
+ push ebx
+ mov ebx, DWORD PTR bi_ct[esp+STACKOFFSET] ; ebx = bi_ct
+ movzx eax, WORD PTR [ebx] ; eax = state
+ push ebp
+ push edi
+ STACKOFFSET = STACKOFFSET+8
+
+ mov edi, DWORD PTR [edx] ; edi = range
+ mov ecx, edi ; ecx = range
+ and ecx, 0C0H ; range >>= 6
+ movzx ebp, BYTE PTR _rLPS_table_64x4[ecx+eax] ; ebp = rLPS
+
+ ; register state:
+ ; eax: state (bi_ct->state)
+ ; ebx: bi_ct
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ mov ecx, DWORD PTR [edx+8] ; ecx = bitsleft
+ sub edi, ebp ; range -= rLPS
+ shl edi, cl ; range << bitsleft
+ cmp DWORD PTR [edx+4], edi ; value < (range << bitsleft)
+ jge SHORT CABAC@LPS
+
+ movzx ax, BYTE PTR _AC_next_state_MPS_64[eax] ; eax = state = AC_next_state_MPS_64[state]
+ shr edi, cl ; undo earlier shift
+ mov WORD PTR [ebx], ax ; bi_ct->MPS = state
+ cmp edi, 256 ; 00000100H
+ setb cl
+
+ ; register state
+ ; eax: state
+ ; ebx: bi_ct
+ ; ecx: state (old)
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ shl edi, cl
+ sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft--
+ mov DWORD PTR [edx], edi ; dep->Drange = range
+ movzx eax, BYTE PTR [ebx+2] ; return bit
+ jz SHORT READ_TWO_BYTES; if (dep->DbitsLeft==0)
+
+ pop edi
+ pop ebp
+ pop ebx
+ ret 0
+align 16
+CABAC@LPS:
+ sub DWORD PTR [edx+4], edi
+ movzx cx, BYTE PTR _AC_next_state_LPS_64[eax] ; cx: state = AC_next_state_LPS_64[state]
+ mov WORD PTR [ebx], cx ; store state back to bi_ct->MPS
+
+ ; register state:
+ ; eax: state (old)
+ ; ebx: bi_ct
+ ; ecx: state (new)
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ mov edi, ebx
+ test eax, eax ; if state(old) == 0
+ movzx ecx, BYTE PTR _renorm_table_256[ebp] ; ecx = renorm_table_32[rLPS>>3]
+ sete bl ; bl = 1 [ if state(old) == 0 ]
+ movzx eax, BYTE PTR [edi+2]
+ xor eax, 1
+ xor BYTE PTR [edi+2], bl ; al ^= bi_ct->state
+
+ ; register state:
+ ; eax: !state
+ ; ebx: bi_ct
+ ; ecx: renorm
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ shl ebp, cl ; ebp = range = rLPS <<= renorm
+ sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft -= renorm;
+ mov DWORD PTR [edx], ebp ; dep->Drange = range;
+ jle SHORT READ_TWO_BYTES ; if( dep->DbitsLeft <= 0 )
+
+ ; register state:
+ ; eax: !state
+ ; ebx: bi_ct
+ ; ecx: renorm
+ ; edx: dep
+ ; edi: range
+ ; ebp: range = rLPS <<= renorm
+
+ pop edi
+ pop ebp
+ pop ebx
+ ret 0
+align 16
+READ_TWO_BYTES:
+
+ ; register state:
+ ; eax: !state
+ ; ebx: bi_ct
+ ; ecx: renorm
+ ; edx: dep
+ ; edi: range
+
+ mov ebx, DWORD PTR [edx+16] ; eax = dep->Dcodestrm_len
+ mov ecx, DWORD PTR [ebx] ; ecx = *dep->Dcodestrm_len
+ lea edi, DWORD PTR [ecx+2] ; edi = *dep->Dcodestrm_len + 2
+ mov DWORD PTR [ebx], edi ; *dep->Dcodestrm_len += 2
+ mov ebx, DWORD PTR [edx+12] ; edx = dep->Dcodestrm
+ movzx ecx, WORD PTR [ebx+ecx]
+ xchg cl, ch
+ shl DWORD PTR [edx+4], 16
+ mov WORD PTR [edx+4], cx
+
+ add DWORD PTR [edx+8], 16 ; dep->DbitsLeft += 16
+ ;mov eax, DWORD PTR _bit$[esp+STACKOFFSET] ; eax = bit = return value
+ pop edi
+ pop ebp
+ pop ebx
+ ret 0
+_biari_decode_symbol ENDP
+_TEXT ENDS
+
+;
+;
+; a version of biari_decode_symbol slightly optimized
+; pass dep in edx and ctx in eax. edx retains dep on exit
+
+_TEXT SEGMENT
+_biari_decode_symbol_map PROC NEAR
+ STACKOFFSET=0
+ push ebx
+ STACKOFFSET=4
+ movzx ebx, WORD PTR [eax] ; ebx = state
+ push ebp
+ push edi
+ STACKOFFSET = 12
+
+ mov edi, DWORD PTR [edx] ; edi = range
+ mov ecx, edi ; ecx = range
+ and ecx, 0C0H ; range >>= 6
+ movzx ebp, BYTE PTR _rLPS_table_64x4[ecx+ebx] ; ebp = rLPS
+
+ ; register state:
+ ; ebx: state (bi_ct->state)
+ ; eax: bi_ct
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ mov ecx, DWORD PTR [edx+8] ; ecx = bitsleft
+ sub edi, ebp ; range -= rLPS
+ shl edi, cl ; range << bitsleft
+ cmp DWORD PTR [edx+4], edi ; value < (range << bitsleft)
+ jge SHORT CABAC_OPT@LPS
+; MPS
+ movzx bx, BYTE PTR _AC_next_state_MPS_64[ebx] ; ebx = state = AC_next_state_MPS_64[state]
+ shr edi, cl ; undo earlier shift
+ mov WORD PTR [eax], bx ; bi_ct->MPS = state
+ cmp edi, 256 ; 00000100H
+ setb cl
+
+ ; register state
+ ; ebx: state
+ ; eax: bi_ct
+ ; ecx: state (old)
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ shl edi, cl
+ sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft--
+ mov DWORD PTR [edx], edi ; dep->Drange = range
+ movzx eax, BYTE PTR [eax+2] ; return bit
+ jz SHORT READ_TWO_BYTES ; if (dep->DbitsLeft==0)
+
+ ; register state
+ ; ebx: state
+ ; eax: bi_ct
+ ; ecx: range<<1
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ pop edi
+ pop ebp
+ pop ebx
+ ret 0
+align 16
+CABAC_OPT@LPS:
+ sub DWORD PTR [edx+4], edi
+ movzx cx, BYTE PTR _AC_next_state_LPS_64[ebx] ; cx: state = AC_next_state_LPS_64[state]
+ mov WORD PTR [eax], cx ; store state back to bi_ct->MPS
+
+ ; register state:
+ ; ebx: state (old)
+ ; eax: bi_ct
+ ; ecx: state (new)
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ mov edi, eax
+ movzx eax, BYTE PTR [eax+2]
+ xor eax, 1
+ test ebx, ebx ; if state(old) == 0
+ movzx ecx, BYTE PTR _renorm_table_256[ebp] ; ecx = renorm_table_32[rLPS>>3]
+ sete bl ; bl = 1 [ if state(old) == 0 ]
+ xor BYTE PTR [edi+2], bl ; bl ^= bi_ct->state
+
+ ; register state:
+ ; ebx: !state
+ ; eax: bi_ct
+ ; ecx: renorm
+ ; edx: dep
+ ; edi: range
+ ; ebp: rLPS
+
+ shl ebp, cl ; ebp = range = rLPS <<= renorm
+ sub DWORD PTR [edx+8], ecx ; dep->DbitsLeft -= renorm;
+ mov DWORD PTR [edx], ebp ; dep->Drange = range;
+ jle SHORT READ_TWO_BYTES ; if( dep->DbitsLeft <= 0 )
+
+ ; register state:
+ ; ebx: !state
+ ; eax: bi_ct
+ ; ecx: renorm
+ ; edx: dep
+ ; edi: range
+ ; ebp: range = rLPS <<= renorm
+
+ pop edi
+ pop ebp
+ pop ebx
+ ret 0
+align 16
+READ_TWO_BYTES:
+
+ ; register state:
+ ; ebx: !state
+ ; eax: bi_ct
+ ; ecx: renorm
+ ; edx: dep
+ ; edi: range
+
+ mov ebx, DWORD PTR [edx+16] ; ebx = dep->Dcodestrm_len
+ mov ecx, DWORD PTR [ebx] ; ecx = *dep->Dcodestrm_len
+ lea edi, DWORD PTR [ecx+2] ; edi = *dep->Dcodestrm_len + 2
+ mov DWORD PTR [ebx], edi ; *dep->Dcodestrm_len += 2
+ mov ebx, DWORD PTR [edx+12] ; edx = dep->Dcodestrm
+ movzx ecx, WORD PTR [ebx+ecx]
+ xchg cl, ch
+ shl DWORD PTR [edx+4], 16
+ mov WORD PTR [edx+4], cx
+
+ add DWORD PTR [edx+8], 16 ; dep->DbitsLeft += 16
+ pop edi
+ pop ebp
+ pop ebx
+ ret 0
+
+_biari_decode_symbol_map ENDP
+_TEXT ENDS
+
+
+; ebx, ebp and edi are NOT preserved
+; pass tex_ctx in ebp
+; pass type in ebx
+; pass dep in edx
+; pass coeff in edi
+SigCoefFunction MACRO MaxC2, TypeCtxOne, TypeCtxAbs, MaxPos
+_abs_contexts$ = 28 ; local variable (safe because of how the function is called)
+_one_contexts$ = 32 ; local variable (safe because of how the function is called)
+STACKOFFSET=0
+ lea eax, DWORD PTR [ebp+TypeCtxOne*20+6068] ; 6068 = offsetof(tex_ctx, one_contexts)
+ mov DWORD PTR _one_contexts$[esp+STACKOFFSET], eax ; one_contexts = tex_ctx->one_contexts[type2ctx_one[type]];
+ ;push esi
+ STACKOFFSET=STACKOFFSET+0
+
+ ;esi: i (loop variable) = maxpos[type]
+ lea ecx, DWORD PTR [ebp+TypeCtxAbs*20+6508]
+ mov ebp, 1 ; ebp: c1
+ xor ebx, ebx ; ebx: c2
+ mov DWORD PTR _abs_contexts$[esp+STACKOFFSET], ecx ; abs_contexts = tex_ctx->abs_contexts[type2ctx_abs[type]];
+SIGN_COEFF@LOOP_AGAIN:
+ cmp WORD PTR [edi+esi*2], 0 ;if (coeff[i]!=0)
+ je SHORT SIGN_COEFF@LOOP_ITR
+ mov ecx, DWORD PTR _one_contexts$[esp+STACKOFFSET]
+ lea eax, DWORD PTR [ecx+ebp*4]
+ mov ebp, DWORD PTR plus_one_clip0_4[ebp*4] ; c1 = plus_one_clip0_4[c1];
+ call _biari_decode_symbol_map ; biari_decode_symbol (dep_dp, one_contexts + c1);
+ test eax, eax
+ jz SHORT SIGN_COEFF@DECODE_EQ_PROB
+ ;add WORD PTR [edi+esi*2], ax ; coeff[i] +=
+ mov ecx, DWORD PTR _abs_contexts$[esp+STACKOFFSET]
+ lea eax, DWORD PTR [ecx+ebx*4]
+ call _unary_exp_golomb_level_decode ;unary_exp_golomb_level_decode (dep_dp, abs_contexts + c2);
+ inc eax
+ add WORD PTR [edi+esi*2], ax ; coeff[i] += return val
+ xor ebp, ebp ; c1 = 0
+ mov ebx, DWORD PTR MaxC2[ebx*4]
+SIGN_COEFF@DECODE_EQ_PROB:
+ call _biari_decode_symbol_eq_prob_asm ; biari_decode_symbol_eq_prob(dep_dp)
+ js SHORT SIGN_COEFF@LOOP_ITR
+ neg WORD PTR [edi+esi*2]
+SIGN_COEFF@LOOP_ITR:
+ sub esi, 1
+ jns SHORT SIGN_COEFF@LOOP_AGAIN
+ pop esi
+ ret 0
+ENDM
+
+_TEXT SEGMENT
+_read_significant_coefficients0 PROC
+SigCoefFunction plus_one_clip4, 0, 0, 15
+_read_significant_coefficients0 ENDP
+_read_significant_coefficients1 PROC
+SigCoefFunction plus_one_clip4, 1, 1, 14
+_read_significant_coefficients1 ENDP
+_read_significant_coefficients2 PROC
+SigCoefFunction plus_one_clip4, 2, 2, 63
+_read_significant_coefficients2 ENDP
+_read_significant_coefficients3 PROC
+SigCoefFunction plus_one_clip4, 3, 3, 31
+_read_significant_coefficients3 ENDP
+_read_significant_coefficients4 PROC
+SigCoefFunction plus_one_clip4, 3, 3, 31
+_read_significant_coefficients4 ENDP
+_read_significant_coefficients5 PROC
+SigCoefFunction plus_one_clip4, 4, 4, 15
+_read_significant_coefficients5 ENDP
+_read_significant_coefficients6 PROC
+SigCoefFunction plus_one_clip3, 5, 5, 3
+_read_significant_coefficients6 ENDP
+_read_significant_coefficients7 PROC
+SigCoefFunction plus_one_clip4, 6, 6, 14
+_read_significant_coefficients7 ENDP
+_read_significant_coefficients8 PROC
+SigCoefFunction plus_one_clip3, 5, 5, 7
+_read_significant_coefficients8 ENDP
+_read_significant_coefficients9 PROC
+SigCoefFunction plus_one_clip4, 5, 5, 15
+_read_significant_coefficients9 ENDP
+_read_significant_coefficients10 PROC
+SigCoefFunction plus_one_clip4, 10, 10, 15
+_read_significant_coefficients10 ENDP
+_read_significant_coefficients11 PROC
+SigCoefFunction plus_one_clip4, 11, 11, 14
+_read_significant_coefficients11 ENDP
+_read_significant_coefficients12 PROC
+SigCoefFunction plus_one_clip4, 12, 12, 63
+_read_significant_coefficients12 ENDP
+_read_significant_coefficients13 PROC
+SigCoefFunction plus_one_clip4, 13, 13, 31
+_read_significant_coefficients13 ENDP
+_read_significant_coefficients14 PROC
+SigCoefFunction plus_one_clip4, 13, 13, 31
+_read_significant_coefficients14 ENDP
+_read_significant_coefficients15 PROC
+SigCoefFunction plus_one_clip4, 14, 14, 15
+_read_significant_coefficients15 ENDP
+_read_significant_coefficients16 PROC
+SigCoefFunction plus_one_clip4, 16, 16, 15
+_read_significant_coefficients16 ENDP
+_read_significant_coefficients17 PROC
+SigCoefFunction plus_one_clip4, 17, 17, 14
+_read_significant_coefficients17 ENDP
+_read_significant_coefficients18 PROC
+SigCoefFunction plus_one_clip4, 18, 18, 63
+_read_significant_coefficients18 ENDP
+_read_significant_coefficients19 PROC
+SigCoefFunction plus_one_clip4, 19, 19, 31
+_read_significant_coefficients19 ENDP
+_read_significant_coefficients20 PROC
+SigCoefFunction plus_one_clip4, 19, 19, 31
+_read_significant_coefficients20 ENDP
+_read_significant_coefficients21 PROC
+SigCoefFunction plus_one_clip4, 20, 20, 15
+_read_significant_coefficients21 ENDP
+_TEXT ENDS
+
+
+;
+; push eax ; currSlice->coeff
+; push ecx ; tex_ctx
+; edi is NOT preserved
+; pass currMB in edi
+; pass dep in ebp
+; pass type in ebx
+; on return, edi contains coeff, edx contains dep
+
+
+SigMapFunction MACRO PosCtxMap, TypeCtxLast, IsDC, MaxPos, PosCtxLast, TypeCtxMap, Func
+last_ctx$ = 24 ; local variable (cheating and using stack space from _readRunLevel_CABAC)
+coeff_ctr$ = 28 ; local variable (cheating and using stack space from _readRunLevel_CABAC)
+ STACKOFFSET=0
+ mov edx, DWORD PTR [edi+p_Vid@Macroblock] ; edx: p_Vid
+ push esi
+ xor esi, esi
+ STACKOFFSET=STACKOFFSET+4
+ mov edx, DWORD PTR [edx+structure@VideoParameters]
+ add edx, DWORD PTR [edi+mb_field@Macroblock] ; currMB->mb_field
+ mov edi, eax ; edi: coeff
+ mov eax, 1408 ; 16 * 22 * sizeof(BiContextType)
+ cmovz eax, esi
+ mov edx, OFFSET PosCtxMap
+ cmovnz edx, DWORD PTR _pos2ctx_map_int[ebx*4]
+ IF IsDC EQ 0
+ lea ebx, [edx + 1]
+ ELSE
+ mov ebx, edx ; pos2ctx_Map = (fld) ? pos2ctx_map_int[type] : pos2ctx_map[type];
+ ENDIF
+ mov edx, ebp
+ lea ebp, [eax+ecx+TypeCtxMap*64+map_contexts@TextureInfoContexts] ; map_ctx = tex_ctx->map_contexts[fld][type2ctx_map [type]];
+ lea ecx, DWORD PTR [eax+ecx+TypeCtxLast*64+last_contexts@TextureInfoContexts]
+ mov DWORD PTR last_ctx$[esp+STACKOFFSET], ecx ; last_ctx = tex_ctx->last_contexts[fld][type2ctx_last[type]];
+ mov DWORD PTR coeff_ctr$[esp+STACKOFFSET], esi; coeff_ctr = 0
+ ;jne LOOP_AGAIN
+
+ ; esi: i
+ ; ebx: i1 (loop end)
+ ; ebp: dep_dp
+ ; edi: coeff
+; for (i=i0; i < i1; ++i) // if last coeff is reached, it has to be significant
+LOOP_AGAIN:
+
+; --- read significance symbol ---
+; if (biari_decode_symbol (dep_dp, map_ctx + pos2ctx_Map[i]))
+
+ movzx eax, BYTE PTR [esi+ebx]
+ lea eax, DWORD PTR [ebp+eax*4]
+ call _biari_decode_symbol_map
+ test eax, eax
+ mov WORD PTR [edi+esi*2], ax ; coeff[i] = biari_decode_symbol()
+ je SHORT LOOP_ITR
+
+; --- read last coefficient symbol ---
+; if (biari_decode_symbol (dep_dp, last_ctx + last[i]))
+
+ inc DWORD PTR coeff_ctr$[esp+STACKOFFSET] ; coeff_ctr++
+ IF IsDC EQ 0
+ movzx ecx, BYTE PTR PosCtxLast[esi+1]
+ ELSE
+ movzx ecx, BYTE PTR PosCtxLast[esi]
+ ENDIF
+ mov eax, DWORD PTR last_ctx$[esp+STACKOFFSET]
+ lea eax, DWORD PTR [eax+ecx*4]
+ call _biari_decode_symbol_map
+ test eax, eax
+ je SHORT LOOP_ITR
+
+ mov eax, DWORD PTR coeff_ctr$[esp+STACKOFFSET]; return coeff_ctr;
+ mov ecx, DWORD PTR [esp]
+ mov ebp, DWORD PTR [ecx+tex_ctx@Slice] ; ; edx: currSlice->tex_ctx
+ mov DWORD PTR [ecx+coeff_ctr@Slice], eax ; currSlice->coeff_ctr = return value (read_significance_map)
+ jmp Func
+ align 16
+LOOP_ITR:
+ inc esi
+ cmp esi, MaxPos
+ jl SHORT LOOP_AGAIN
+ mov eax, DWORD PTR coeff_ctr$[esp+STACKOFFSET]
+ mov WORD PTR [edi+esi*2], 1
+ inc eax
+ mov ecx, DWORD PTR [esp]
+ mov ebp, DWORD PTR [ecx+tex_ctx@Slice] ; ; edx: currSlice->tex_ctx
+ mov DWORD PTR [ecx+coeff_ctr@Slice], eax ; currSlice->coeff_ctr = return value (read_significance_map)
+ jmp Func
+ENDM
+
+_TEXT SEGMENT
+_read_significance_map0 PROC
+SigMapFunction _pos2ctx_map4x4, 0, 1, 15, _pos2ctx_last4x4, 0, _read_significant_coefficients0
+_read_significance_map0 ENDP
+_read_significance_map1 PROC
+SigMapFunction _pos2ctx_map4x4, 1, 0, 14, _pos2ctx_last4x4, 1, _read_significant_coefficients1
+_read_significance_map1 ENDP
+_read_significance_map2 PROC
+SigMapFunction _pos2ctx_map8x8, 2, 1, 63, _pos2ctx_last8x8, 2, _read_significant_coefficients2
+_read_significance_map2 ENDP
+_read_significance_map3 PROC
+SigMapFunction _pos2ctx_map8x4, 3, 1, 31, _pos2ctx_last8x4, 3, _read_significant_coefficients3
+_read_significance_map3 ENDP
+_read_significance_map4 PROC
+SigMapFunction _pos2ctx_map8x4, 4, 1, 31, _pos2ctx_last8x4, 4, _read_significant_coefficients4
+_read_significance_map4 ENDP
+_read_significance_map5 PROC
+SigMapFunction _pos2ctx_map4x4, 5, 1, 15, _pos2ctx_last4x4, 5, _read_significant_coefficients5
+_read_significance_map5 ENDP
+_read_significance_map6 PROC
+SigMapFunction _pos2ctx_map4x4, 6, 1, 3, _pos2ctx_last4x4, 6, _read_significant_coefficients6
+_read_significance_map6 ENDP
+_read_significance_map7 PROC
+SigMapFunction _pos2ctx_map4x4, 7, 0, 14, _pos2ctx_last4x4, 7, _read_significant_coefficients7
+_read_significance_map7 ENDP
+_read_significance_map8 PROC
+SigMapFunction _pos2ctx_map2x4c, 6, 1, 7, _pos2ctx_last2x4c, 6, _read_significant_coefficients8
+_read_significance_map8 ENDP
+_read_significance_map9 PROC
+SigMapFunction _pos2ctx_map4x4c, 6, 1, 15, _pos2ctx_last4x4c, 6, _read_significant_coefficients9
+_read_significance_map9 ENDP
+_read_significance_map10 PROC
+SigMapFunction _pos2ctx_map4x4, 10, 1, 15, _pos2ctx_last4x4, 10, _read_significant_coefficients10
+_read_significance_map10 ENDP
+_read_significance_map11 PROC
+SigMapFunction _pos2ctx_map4x4, 11, 0, 14, _pos2ctx_last4x4, 11, _read_significant_coefficients11
+_read_significance_map11 ENDP
+_read_significance_map12 PROC
+SigMapFunction _pos2ctx_map8x8, 12, 1, 63, _pos2ctx_last8x8, 12, _read_significant_coefficients12
+_read_significance_map12 ENDP
+_read_significance_map13 PROC
+SigMapFunction _pos2ctx_map8x4, 13, 1, 31, _pos2ctx_last8x4, 13, _read_significant_coefficients13
+_read_significance_map13 ENDP
+_read_significance_map14 PROC
+SigMapFunction _pos2ctx_map8x4, 14, 1, 31, _pos2ctx_last8x4, 14, _read_significant_coefficients14
+_read_significance_map14 ENDP
+_read_significance_map15 PROC
+SigMapFunction _pos2ctx_map4x4, 15, 1, 15, _pos2ctx_last4x4, 15, _read_significant_coefficients15
+_read_significance_map15 ENDP
+_read_significance_map16 PROC
+SigMapFunction _pos2ctx_map4x4, 16, 1, 15, _pos2ctx_last4x4, 16, _read_significant_coefficients16
+_read_significance_map16 ENDP
+_read_significance_map17 PROC
+SigMapFunction _pos2ctx_map4x4, 17, 0, 14, _pos2ctx_last4x4, 17, _read_significant_coefficients17
+_read_significance_map17 ENDP
+_read_significance_map18 PROC
+SigMapFunction _pos2ctx_map8x8, 18, 1, 63, _pos2ctx_last8x8, 18, _read_significant_coefficients18
+_read_significance_map18 ENDP
+_read_significance_map19 PROC
+SigMapFunction _pos2ctx_map8x4, 19, 1, 31, _pos2ctx_last8x4, 19, _read_significant_coefficients19
+_read_significance_map19 ENDP
+_read_significance_map20 PROC
+SigMapFunction _pos2ctx_map8x4, 20, 1, 31, _pos2ctx_last8x4, 20, _read_significant_coefficients20
+_read_significance_map20 ENDP
+_read_significance_map21 PROC
+SigMapFunction _pos2ctx_map4x4, 21, 1, 15, _pos2ctx_last4x4, 21, _read_significant_coefficients21
+_read_significance_map21 ENDP
+_TEXT ENDS
+
+
+_TEXT SEGMENT
+; edx: dep - unchanged by function
+; SF holds the return value
+_biari_decode_symbol_eq_prob_asm PROC
+ mov ecx, DWORD PTR [edx+8]; dep->DbitsLeft
+ dec ecx ; dep->DbitsLeft--
+ mov eax, DWORD PTR [edx+4] ; eax: dep->DValue
+ push esi
+ jnz SHORT $LN3@biari_deco; if(--(dep->DbitsLeft) == 0)
+
+ mov ecx, DWORD PTR [edx+16] ; ebp: dep->Dcodestrm_len
+ mov esi, DWORD PTR [ecx] ; esi: *dep->Dcodestrm_len
+ add DWORD PTR [ecx], 2 ; *dep->Dcodestrm_len += 2
+ mov ecx, DWORD PTR [edx+12] ; ebp: dep->Dcodestrm
+ shl eax, 16
+ mov ax, WORD PTR [ecx+esi] ; value = (value << 16) | getword( dep )
+ xchg ah, al
+ mov ecx, 16 ; dep->DbitsLeft = 16;
+$LN3@biari_deco:
+ mov esi, DWORD PTR [edx] ; dep->Drange
+ shl esi, cl ; (dep->Drange << dep->DbitsLeft)
+ mov DWORD PTR [edx+8], ecx
+ mov ecx, eax
+ sub ecx, esi
+ pop esi
+
+ cmovns eax, ecx ; if (tmp_value <0) value = tmp_value
+ mov DWORD PTR [edx+4], eax ; dep->Dvalue = value;
+ ret 0
+_biari_decode_symbol_eq_prob_asm ENDP
+_TEXT ENDS
+
+_TEXT SEGMENT
+; edx: dep. retained on return
+; esi and ebp are NOT retained, because the (only) calling function doesn't need them to be
+_exp_golomb_decode_eq_prob0 PROC
+STACKOFFSET=0
+ xor esi, esi ; esi: binary_symbol
+ xor ebp, ebp ; ebp: symbol
+ push edi
+ mov edi, 1 ; edi: k
+DECODE_EQ@LOOP_AGAIN:
+ call _biari_decode_symbol_eq_prob_asm ; l = biari_decode_symbol_eq_prob(dep_dp);
+ js SHORT DECODE_EQ@LOOP_DONE
+ add ebp, edi ; symbol += k
+ shl edi, 1 ; k <<= 1
+ jmp SHORT DECODE_EQ@LOOP_AGAIN
+ align 16
+DECODE_EQ@LOOP_DONE:
+ shr edi, 1
+ jz SHORT DECODE_EQ@RETURN
+ call _biari_decode_symbol_eq_prob_asm ; if (biari_decode_symbol_eq_prob(dep_dp)==1)
+ js SHORT DECODE_EQ@LOOP_DONE
+ or esi, edi ; binary_symbol |= (1<<k);
+ jmp SHORT DECODE_EQ@LOOP_DONE
+ align 16
+DECODE_EQ@RETURN:
+ lea eax, DWORD PTR [esi+ebp+13] ; return (unsigned int) (symbol + binary_symbol);
+ pop edi
+ ret 0
+_exp_golomb_decode_eq_prob0 ENDP
+_TEXT ENDS
+
+;
+;
+; pass dep in edx, context in eax
+; edx is retained on return
+; ebp is destroyed
+
+_TEXT SEGMENT
+ctx = 4 ; second parameter
+_unary_exp_golomb_level_decode PROC
+ STACKOFFSET=0
+ mov ebp, eax ; eax (and now ebp also) contains the context pointer
+ call _biari_decode_symbol_map
+ test eax, eax ; if (symbol==0)
+ jne SHORT SYMBOL_NOT_ZERO
+ ret 0
+align 16
+SYMBOL_NOT_ZERO:
+ push esi
+ xor esi, esi
+LEVEL_DECODE@LOOP_AGAIN:
+
+ mov eax, ebp ; _biari_decode_symbol_map wants ctx in eax
+ inc esi ; ++symbol;
+ call _biari_decode_symbol_map ; l = biari_decode_symbol(dep_dp, ctx);
+
+ test eax, eax ; if (!l)
+ je SHORT LEVEL_IS_ZERO
+ cmp esi, 12 ; exp_start-1
+ jb SHORT LEVEL_DECODE@LOOP_AGAIN
+
+ call _exp_golomb_decode_eq_prob0 ; exp_golomb_decode_eq_prob(dep_dp,0)
+ pop esi
+ ret 0
+align 16
+LEVEL_IS_ZERO:
+ mov eax, esi ; return symbol;
+ pop esi
+ ret 0
+_unary_exp_golomb_level_decode ENDP
+_TEXT ENDS
+
+CONST SEGMENT
+sigmap_functions DD FLAT:_read_significance_map0
+DD FLAT:_read_significance_map1
+DD FLAT:_read_significance_map2
+DD FLAT:_read_significance_map3
+DD FLAT:_read_significance_map4
+DD FLAT:_read_significance_map5
+DD FLAT:_read_significance_map6
+DD FLAT:_read_significance_map7
+DD FLAT:_read_significance_map8
+DD FLAT:_read_significance_map9
+DD FLAT:_read_significance_map10
+DD FLAT:_read_significance_map11
+DD FLAT:_read_significance_map12
+DD FLAT:_read_significance_map13
+DD FLAT:_read_significance_map14
+DD FLAT:_read_significance_map15
+DD FLAT:_read_significance_map16
+DD FLAT:_read_significance_map17
+DD FLAT:_read_significance_map18
+DD FLAT:_read_significance_map19
+DD FLAT:_read_significance_map20
+DD FLAT:_read_significance_map21
+CONST ENDS
+
+PUBLIC _readRunLevel_CABAC
+_TEXT SEGMENT
+_currMB$ = 4 ; first parameter
+_dep_dp$ = 8 ; second parameter
+_context$ = 12 ; third parameter
+_readRunLevel_CABAC PROC
+ push esi
+ push edi
+STACKOFFSET=8
+ mov edi, DWORD PTR _currMB$[esp+STACKOFFSET] ; edi: currMB
+ mov esi, DWORD PTR [edi] ; esi: currSlice = currMB->p_Slice;
+
+ cmp DWORD PTR [esi+coeff_ctr@Slice], 0 ; if (currSlice->coeff_ctr >= 0)
+ jge SHORT SET_RUN_AND_LEVEL
+
+; ===== decode CBP-BIT =====
+ mov eax, DWORD PTR [edi+read_and_store_CBP_block_bit@Macroblock] ; eax: currMB->read_and_store_CBP_block_bit
+ push ebx
+STACKOFFSET=STACKOFFSET+4
+ mov ebx, DWORD PTR _context$[esp+STACKOFFSET] ; ebx: context
+ push ebp
+STACKOFFSET=STACKOFFSET+4
+ mov ebp, DWORD PTR _dep_dp$[esp+STACKOFFSET] ; ebp: dep
+ push ebx ; context
+ push ebp ; dep
+ push edi ; currMB
+ call eax ; currMB->read_and_store_CBP_block_bit(currMB, dep_dp, context)
+ add esp, 12
+ mov DWORD PTR [esi+coeff_ctr@Slice], eax ; currSlice->coeff_ctr = return value
+ test eax, eax ; if (currSlice->coeff_ctr == 0)
+ je SHORT SET_RUN_AND_LEVEL_POP
+
+; ===== decode significance coefficients =====
+ mov ecx, DWORD PTR [esi+tex_ctx@Slice] ; ecx: currSlice->tex_ctx
+ lea eax, DWORD PTR [esi+coeff@Slice] ; eax: currSlice->coeff
+ ;push eax ; currSlice->coeff
+ ;push ecx ; tex_ctx
+ ;call _read_significance_map ; read_significance_map(currSlice->tex_ctx, currMB, dep_dp, context, currSlice->coeff);
+ call sigmap_functions[ebx*4]
+SET_RUN_AND_LEVEL_POP:
+ pop ebp
+ pop ebx
+STACKOFFSET=STACKOFFSET-8
+SET_RUN_AND_LEVEL:
+
+; --- set run and level ---
+ xor edx, edx ; edx: 0
+
+ dec DWORD PTR [esi+coeff_ctr@Slice] ; if (currSlice->coeff_ctr--)
+ js SHORT EOB
+
+; --- set run and level (coefficient) ---
+ mov ecx, DWORD PTR [esi+pos@Slice] ; ecx: currSlice->pos
+ xor edi, edi ; edi: run=0
+ cmp WORD PTR [esi+ecx*2+coeff@Slice], dx ; currSlice->coeff[currSlice->pos] == 0
+ jne SHORT LOOP_END
+LOOP_ITR:
+ cmp WORD PTR [esi+ecx*2+1+coeff@Slice], dx ; currSlice->coeff[currSlice->pos] == 0
+ lea ecx, [ecx+1]
+ lea edi, [edi+1]
+ je SHORT LOOP_ITR
+LOOP_END:
+ movsx eax, WORD PTR [esi+ecx*2+coeff@Slice] ; eax: value = currSlice->coeff[currSlice->pos]
+ inc ecx ; currSlice->pos++
+
+; --- decrement coefficient counter and re-set position ---
+
+ ;cmp DWORD PTR [esi+coeff_ctr@Slice], edx ; if (currSlice->coeff_ctr == 0)
+ ;cmove ecx, edx ; currSlice->pos = 0
+ mov edx, edi
+ pop edi
+ mov DWORD PTR [esi+pos@Slice], ecx ; store currSlice->pos
+ pop esi
+ ret 0 ; eax contains value
+ align 16
+EOB:
+ xor eax, eax ; return 0
+ mov DWORD PTR [esi+pos@Slice], edx ; currSlice->pos = 0;
+ pop edi
+ pop esi
+ ret 0
+_readRunLevel_CABAC ENDP
+_TEXT ENDS
+
+;
+; edi is not saved
+; pass dep_dp in edx, retained on exit
+; pass ctx in edi
+; return value in esi
+
+PUBLIC _unary_exp_golomb_mv_decode3
+_TEXT SEGMENT
+_ctx$ = 4 ; second parameter
+_unary_exp_golomb_mv_decode3 PROC
+STACKOFFSET=0
+ mov eax, edi
+ call _biari_decode_symbol_map ; pass dep in edx and ctx in eax. edx retains dep on exit
+ test eax, eax ; if (symbol)
+ jne SHORT SYMBOL_NOT_ZERO
+ xor esi, esi
+ ret 0
+ align 16
+SYMBOL_NOT_ZERO:
+ push ebp
+STACKOFFSET=STACKOFFSET+4
+ mov ebp, 3
+ add edi, 4 ; ctx++
+ mov esi, 1 ; esi: symbol
+LOOP_START:
+ mov eax, edi
+ call _biari_decode_symbol_map ; pass dep in edx and ctx in eax. edx retains dep on exit
+ test eax, eax
+ je SHORT SYMBOL_ZERO_RETURN
+
+ inc esi
+ cmp esi, 2 ; if (symbol == 2)
+ sete al ; eax will be 1, so this is safe to do
+ lea edi, [edi + eax*4] ; ctx += (symbol == 2)
+
+ cmp esi, ebp ; if (symbol == max_bin)
+ sete al ; eax will have nothing set high, so this is safe to do
+ lea edi, [edi + eax*4] ; ctx += (symbol != max_bin)
+
+ cmp esi, 8 ; if (symbol < exp_start)
+ jb SHORT LOOP_START
+
+; return exp_start + exp_golomb_decode_eq_prob(dep_dp,3);
+ xor ebp, ebp ; ebp: symbol
+ mov edi, ebp ; edi: binary_symbol
+DECODE_EQ3@LOOP1:
+ call _biari_decode_symbol_eq_prob_asm ; edx holds dep_dp
+ js SHORT DECODE_EQ3@LOOP2
+ or ebp, esi; symbol += (l<<k)
+ shl esi, 1 ; k <<= 1
+ jmp SHORT DECODE_EQ3@LOOP1
+ align 16
+DECODE_EQ3@LOOP2:
+ shr esi, 1
+ jz SHORT DECODE_EQ3@RETURN
+ call _biari_decode_symbol_eq_prob_asm
+ js SHORT DECODE_EQ3@LOOP2
+ or edi, esi ; binary_symbol |= (1<<k);
+ jmp SHORT DECODE_EQ3@LOOP2
+ align 16
+DECODE_EQ3@RETURN:
+ ; return (unsigned int) (symbol + binary_symbol);
+ lea esi, [edi+ebp+8]
+ pop ebp
+ ret 0
+ align 16
+SYMBOL_ZERO_RETURN:
+ ; return symbol is in esi
+ pop ebp
+ ret 0
+_unary_exp_golomb_mv_decode3 ENDP
+_TEXT ENDS
+
+_TEXT SEGMENT
+_unary_bin_decode1 PROC
+; _ctx$ = eax
+; _dep_dp$ = edx
+ push edi
+ mov edi, eax
+ call _biari_decode_symbol_map ; biari_decode_symbol(dep_dp, ctx );
+ test eax, eax ; if (symbol)
+ jne SHORT $LN5@unary_bin_@2
+ mov eax, 2
+ shr eax, 1
+ pop edi
+ ret 0
+align 16
+$LN5@unary_bin_@2:
+ xor esi, esi ; symbol = 0;
+$LL3@unary_bin_@2:
+ inc esi ; ++symbol;
+ lea eax, DWORD PTR [edi+4] ; ctx + ctx_offset
+ call _biari_decode_symbol_map ; biari_decode_symbol(dep_dp, ctx);
+ test eax, eax ; while( l != 0 );
+ jne SHORT $LL3@unary_bin_@2
+ lea eax, [esi + 2]; return symbol+2;
+ shr eax, 1
+ pop edi
+ ret 0
+_unary_bin_decode1 ENDP
+_TEXT ENDS
+
+
+PUBLIC _readDquant_CABAC
+_TEXT SEGMENT
+_currSlice$ = 4 ; first parameter
+_dep_dp$ = 8 ; second parameter
+_readDquant_CABAC PROC
+STACKOFFSET=0
+; 815 : MotionInfoContexts *ctx = currSlice->mot_ctx;
+; 816 : short dquant;
+; 817 : int act_ctx = ((currSlice->last_dquant != 0) ? 1 : 0);
+; 818 : int act_sym = biari_decode_symbol(dep_dp,ctx->delta_qp_contexts + act_ctx );
+
+ mov edx, DWORD PTR _dep_dp$[esp+STACKOFFSET]
+ push esi
+ push edi
+STACKOFFSET = STACKOFFSET + 8
+ mov edi, DWORD PTR _currSlice$[esp+STACKOFFSET]
+ mov esi, DWORD PTR [edi+mot_ctx@Slice]
+ xor eax, eax
+ cmp DWORD PTR [edi+last_dquant@Slice], eax
+ setne al
+ lea eax, DWORD PTR [esi+eax*4+332]
+ ; pass dep in edx and ctx in eax. edx retains dep on exit
+ call _biari_decode_symbol_map
+
+ test eax, eax ; if (!act_sym)
+ jz SHORT $LN2@readDquant
+
+ lea eax, DWORD PTR [esi+340] ; unary_bin_decode(dep_dp,ctx->delta_qp_contexts + 2,1);
+ call _unary_bin_decode1
+
+ jnc SHORT $LN2@readDquant ; lsb is signed bit
+
+ neg eax ; dquant = -dquant;
+ movzx eax, ax
+$LN2@readDquant:
+ movsx edx, ax
+ mov DWORD PTR [edi+last_dquant@Slice], edx ; currSlice->last_dquant = dquant;
+ pop edi
+ pop esi
+ ;mov ax, cx ; return dquant;
+ ret 0
+_readDquant_CABAC ENDP
+_TEXT ENDS
+
+PUBLIC _readIntraPredMode_CABAC
+_TEXT SEGMENT
+_currSlice$ = 4 ; first parameter
+_dep_dp$ = 8 ; second parameter
+_readIntraPredMode_CABAC PROC
+; 720 : TextureInfoContexts *ctx =
+STACKOFFSET=0
+ mov eax, DWORD PTR _currSlice$[esp + STACKOFFSET]
+ push esi
+ mov esi, DWORD PTR [eax+100] ; currSlice->tex_ctx;
+STACKOFFSET=4
+; 721 : int act_sym;
+; 722 :
+; 723 : // use_most_probable_mode
+; 724 : act_sym = biari_decode_symbol(dep_dp, ctx->ipr_contexts);
+
+ mov edx, DWORD PTR _dep_dp$[esp+STACKOFFSET]
+ lea eax, DWORD PTR [esi+12]
+ call _biari_decode_symbol_map
+
+ ; remaining_mode_selector
+ test eax, eax ; if (act_sym == 0)
+ jz SHORT $LN2@readIntraP
+
+ or eax, -1 ; return -1;
+ pop esi
+ ret 0
+align 16
+$LN2@readIntraP:
+ push ebx
+ add esi, 16 ; 00000010H
+ mov eax, esi
+ call _biari_decode_symbol_map
+ mov ebx, eax
+; 735 : pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 1);
+
+ mov eax, esi
+ call _biari_decode_symbol_map
+ lea ebx, [ebx+2*eax]
+; 736 : pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 2);
+
+ mov eax, esi
+ call _biari_decode_symbol_map
+ lea eax, [ebx+4*eax] ; return pred_mode;
+
+ pop ebx
+ pop esi
+ ret 0
+_readIntraPredMode_CABAC ENDP
+_TEXT ENDS
+
+PUBLIC _readMB_skip_flagInfo_CABAC
+_TEXT SEGMENT
+_currMB$ = 4 ; first parameter
+_dep_dp$ = 12 ; size = 4
+_readMB_skip_flagInfo_CABAC PROC
+
+; 406 : Slice *currSlice = currMB->p_Slice;
+STACKOFFSET=0
+ mov ecx, DWORD PTR _currMB$[esp + STACKOFFSET]
+ push ebp
+
+ xor eax, eax
+ push esi
+ mov esi, DWORD PTR [ecx + p_Slice@Macroblock] ; esi: currSlice
+ cmp DWORD PTR [esi+slice_type@Slice], 1 ; int bframe=(currSlice->slice_type == B_SLICE);
+ push edi
+
+ mov edi, DWORD PTR [esi+mot_ctx@Slice] ; edi: ctx = currSlice->mot_ctx;
+ sete al ; int bframe=(currSlice->slice_type == B_SLICE);
+
+; 409 : int a = (currMB->mb_left != NULL) ? (currMB->mb_left->skip_flag == 0) : 0;
+
+ xor edx, edx
+ mov ebp, eax
+ mov eax, DWORD PTR [ecx+104]
+ test eax, eax
+ je SHORT READ_B
+ cmp DWORD PTR [eax+348], edx
+ sete dl
+
+; 410 : int b = (currMB->mb_up != NULL) ? (currMB->mb_up ->skip_flag == 0) : 0;
+
+READ_B:
+ mov ecx, DWORD PTR [ecx+100]
+ xor eax, eax
+ test ecx, ecx
+ je SHORT $LN9@readMB_ski
+ cmp DWORD PTR [ecx+348], eax
+ sete al
+$LN9@readMB_ski:
+
+; 414 : if (bframe)
+; 415 : {
+; 416 : act_ctx = 7 + a + b;
+; 418 : skip = biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx]);
+
+ add eax, edx
+ test ebp, ebp
+ mov edx, DWORD PTR _dep_dp$[esp+8]
+ je SHORT $LN3@readMB_ski
+ lea eax, DWORD PTR [edi+eax*4+116]
+ jmp SHORT $LN11@readMB_ski
+align 16
+$LN3@readMB_ski:
+
+; 422 : act_ctx = a + b;
+; 424 : skip = biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][act_ctx]);
+
+ lea eax, DWORD PTR [edi+eax*4+44]
+$LN11@readMB_ski:
+ call _biari_decode_symbol_map
+
+ test eax, eax ; if (!skip)
+ je SHORT $LN1@readMB_ski
+
+; 429 : currSlice->last_dquant = 0;
+
+ mov DWORD PTR [esi + last_dquant@Slice], 0
+$LN1@readMB_ski:
+ pop edi
+ pop esi
+ pop ebp
+ ret 0
+_readMB_skip_flagInfo_CABAC ENDP
+_TEXT ENDS
+
+
+PUBLIC _set_chroma_qp
+_TEXT SEGMENT
+_currMB$ = 4 ; first parameter
+_set_chroma_qp PROC
+ mov eax, DWORD PTR _currMB$[esp] ; eax: currMB
+ mov ecx, DWORD PTR [eax+4] ; ecx: currMB->p_Vid
+ mov edx, DWORD PTR [ecx+bitdepth_chroma_qp_scale@VideoParameters] ; edx: p_Vid->bitdepth_chroma_qp_scale;
+ push edi
+ mov edi, DWORD PTR [ecx+dec_picture@VideoParameters] ; edi: p_Vid->dec_picture
+ mov ecx, DWORD PTR [edi+chroma_qp_offset@StorablePicture] ; ecx: dec_picture->chroma_qp_offset[0]
+ add ecx, DWORD PTR [eax+qp@macroblock] ; ecx: dec_picture->chroma_qp_offset[0] + currMB->qp
+ neg edx ; edx: -p_Vid->bitdepth_chroma_qp_scale;
+ cmp ecx, edx
+ cmovl ecx, edx
+ cmp ecx, 51
+ cmovg ecx, DWORD PTR _51 ; cmov doesn't allow for immediates
+ test ecx, ecx
+ cmovge ecx, DWORD PTR _QP_SCALE_CR[ecx*4]
+ mov DWORD PTR [eax+64], ecx
+
+ sub ecx, edx; currMB->qpc[0] + p_Vid->bitdepth_chroma_qp_scale;
+ mov DWORD PTR [eax+qp_scaled@Macroblock + 4], ecx ; currMB->qp_scaled[1]
+ mov ecx, DWORD PTR [edi+chroma_qp_offset@StorablePicture + 4]
+ add ecx, DWORD PTR [eax+qp@macroblock]
+ cmp ecx, edx
+ cmovl ecx, edx
+ cmp ecx, 51
+ cmovg ecx, DWORD PTR _51 ; cmov doesn't allow for immediates
+ test ecx, ecx
+ cmovge ecx, DWORD PTR _QP_SCALE_CR[ecx*4]
+ mov DWORD PTR [eax+64+4], ecx
+ sub ecx, edx
+ pop edi
+ mov DWORD PTR [eax+72 + 8], ecx
+ ret 0
+_set_chroma_qp ENDP
+_TEXT ENDS
+
+PUBLIC _decodeMVD_CABAC
+_TEXT SEGMENT
+_dep_dp$ = 4 ; first parameter
+_mv_ctx$ = 8 ; second parameter
+_act_ctx$ = 12; third parameter
+_err$ = 16 ; 4th parameter
+_decodeMVD_CABAC PROC
+STACKOFFSET = 0
+ mov eax, DWORD PTR _act_ctx$[esp+STACKOFFSET]
+ push edi
+STACKOFFSET = STACKOFFSET + 4
+ mov edi, DWORD PTR _mv_ctx$[esp+STACKOFFSET]
+ lea edi, [edi+eax*4] ; mv_ctx[0][act_ctx]
+ mov eax, DWORD PTR _err$[esp+STACKOFFSET]
+ lea eax, DWORD PTR [edi+eax*4] ; &mv_ctx[0][act_ctx+err]
+ mov edx, DWORD PTR _dep_dp$[esp+STACKOFFSET]
+ call _biari_decode_symbol_map ; int act_sym = biari_decode_symbol(dep_dp,&mv_ctx[0][act_ctx+err] );
+
+ test eax, eax ; if (act_sym != 0)
+ je SHORT SYMBOL_ZERO
+ push esi
+STACKOFFSET = STACKOFFSET + 4
+ lea edi, [edi + 40] ; mv_ctx[1]+act_ctx
+ call _unary_exp_golomb_mv_decode3 ; act_sym = unary_exp_golomb_mv_decode3(dep_dp,mv_ctx[1]+act_ctx);
+ inc esi ; ++act_sym;
+ call _biari_decode_symbol_eq_prob_asm ; mv_sign = biari_decode_symbol_eq_prob(dep_dp);
+ js SHORT SKIP_NEGATE; if(mv_sign)
+ neg esi ; act_sym = -act_sym;
+SKIP_NEGATE:
+ mov eax, esi
+ pop esi
+SYMBOL_ZERO:
+ pop edi
+ ret 0
+_decodeMVD_CABAC ENDP
+_TEXT ENDS
+
+END
+
diff --git a/Src/h264dec/ldecod/src/biaridecod.c b/Src/h264dec/ldecod/src/biaridecod.c
new file mode 100644
index 00000000..8b1d44f3
--- /dev/null
+++ b/Src/h264dec/ldecod/src/biaridecod.c
@@ -0,0 +1,322 @@
+/*!
+ *************************************************************************************
+ * \file biaridecod.c
+ *
+ * \brief
+ * Binary arithmetic decoder routines.
+ *
+ * This modified implementation of the M Coder is based on JVT-U084
+ * with the choice of M_BITS = 16.
+ *
+ * \date
+ * 21. Oct 2000
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Detlev Marpe <marpe@hhi.de>
+ * - Gabi Blaettermann
+ * - Gunnar Marten
+ *************************************************************************************
+ */
+
+#include "global.h"
+#include "memalloc.h"
+#include "biaridecod.h"
+
+
+#define B_BITS 10 // Number of bits to represent the whole coding interval
+#define HALF 0x01FE //(1 << (B_BITS-1)) - 2
+#define QUARTER 0x0100 //(1 << (B_BITS-2))
+
+
+/************************************************************************
+ ************************************************************************
+ init / exit decoder
+ ************************************************************************
+ ************************************************************************/
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocates memory for the DecodingEnvironment struct
+ * \return DecodingContextPtr
+ * allocates memory
+ ************************************************************************
+ */
+DecodingEnvironmentPtr arideco_create_decoding_environment()
+{
+ DecodingEnvironmentPtr dep;
+
+ if ((dep = calloc(1,sizeof(DecodingEnvironment))) == NULL)
+ no_mem_exit("arideco_create_decoding_environment: dep");
+ return dep;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Frees memory of the DecodingEnvironment struct
+ ***********************************************************************
+ */
+void arideco_delete_decoding_environment(DecodingEnvironmentPtr dep)
+{
+ if (dep == NULL)
+ {
+ snprintf(errortext, ET_SIZE, "Error freeing dep (NULL pointer)");
+ error (errortext, 200);
+ }
+ else
+ free(dep);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * finalize arithetic decoding():
+ ************************************************************************
+ */
+void arideco_done_decoding(DecodingEnvironmentPtr dep)
+{
+ (*dep->Dcodestrm_len)++;
+#if(TRACE==2)
+ fprintf(p_trace, "done_decoding: %d\n", *dep->Dcodestrm_len);
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * read one byte from the bitstream
+ ************************************************************************
+ */
+unsigned int getbyte(DecodingEnvironmentPtr dep)
+{
+#if(TRACE==2)
+ fprintf(p_trace, "get_byte: %d\n", (*dep->Dcodestrm_len));
+#endif
+ return dep->Dcodestrm[(*dep->Dcodestrm_len)++];
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * read two bytes from the bitstream
+ ************************************************************************
+ */
+
+static unsigned int getword(DecodingEnvironmentPtr dep)
+{
+ int d = *dep->Dcodestrm_len;
+ *dep->Dcodestrm_len += 2;
+ return ((dep->Dcodestrm[d]<<8) | dep->Dcodestrm[d+1]);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initializes the DecodingEnvironment for the arithmetic coder
+ ************************************************************************
+ */
+void arideco_start_decoding(DecodingEnvironmentPtr dep, unsigned char *code_buffer,
+ int firstbyte, int *code_len)
+{
+
+ dep->Dcodestrm = code_buffer;
+ dep->Dcodestrm_len = code_len;
+ *dep->Dcodestrm_len = firstbyte;
+
+ dep->Dvalue = getbyte(dep);
+ dep->Dvalue = (dep->Dvalue << 16) | getword(dep); // lookahead of 2 bytes: always make sure that bitstream buffer
+ // contains 2 more bytes than actual bitstream
+ dep->DbitsLeft = 15;
+ dep->Drange = HALF;
+
+#if (2==TRACE)
+ fprintf(p_trace, "value: %d firstbyte: %d code_len: %d\n", dep->Dvalue >> dep->DbitsLeft, firstbyte, *code_len);
+#endif
+}
+
+
+
+
+/*!
+************************************************************************
+* \brief
+* biari_decode_symbol():
+* \return
+* the decoded symbol
+************************************************************************
+*/
+/* random notes
+max rLPS = 240 1111 1 111
+max state = 63
+max renorm = 6, min 1
+max bitsleft = 16
+max range = (1<<10) ????? (1024)
+*/
+#if !defined(_M_IX86) || defined(_DEBUG)
+unsigned int biari_decode_symbol(DecodingEnvironmentPtr dep, BiContextTypePtr bi_ct )
+{
+ unsigned int state = bi_ct->state;
+ unsigned int bit = bi_ct->MPS;
+ unsigned int value = dep->Dvalue;
+ unsigned int range = dep->Drange;
+ const unsigned int rLPS = rLPS_table_64x4[(range>>6)&3][state];
+
+ range -= rLPS;
+
+ if(value >= (range << dep->DbitsLeft))
+ { // LPS
+ int renorm;
+ bi_ct->state = AC_next_state_LPS_64[state]; // next state
+ value -= (range << dep->DbitsLeft);
+ bit ^= 0x01;
+
+ //if (!state) // switch meaning of MPS if necessary
+ // bi_ct->MPS = bit;
+ bi_ct->MPS ^= !state;//0x01;
+
+ renorm = renorm_table_256[rLPS];
+ range = (rLPS << renorm);
+
+ dep->Drange = range;
+ dep->DbitsLeft -= renorm;
+ if( dep->DbitsLeft > 0 )
+ {
+ dep->Dvalue = value;
+ return(bit);
+ }
+
+ dep->Dvalue = (value << 16) | getword(dep); // lookahead of 2 bytes: always make sure that bitstream buffer
+ // contains 2 more bytes than actual bitstream
+ dep->DbitsLeft += 16;
+
+ return(bit);
+ }
+ else
+ { //MPS
+ bi_ct->state = AC_next_state_MPS_64[state]; // next state
+
+ if( range < QUARTER )
+ {
+ dep->Drange = range << 1;
+ dep->DbitsLeft -= 1;
+ if( dep->DbitsLeft > 0 )
+ {
+ return(bit);
+ }
+
+ dep->Dvalue = (value << 16) | getword(dep); // lookahead of 2 bytes: always make sure that bitstream buffer
+ // contains 2 more bytes than actual bitstream
+ dep->DbitsLeft += 16;
+
+ return(bit);
+ }
+ else
+ {
+ dep->Drange = range;
+ return (bit);
+ }
+ }
+
+}
+#endif
+/*!
+ ************************************************************************
+ * \brief
+ * biari_decode_symbol_eq_prob():
+ * \return
+ * the decoded symbol
+ ************************************************************************
+ */
+unsigned int biari_decode_symbol_eq_prob(DecodingEnvironmentPtr dep)
+{
+ int tmp_value;
+ int value = dep->Dvalue;
+
+ if(--(dep->DbitsLeft) == 0)
+ {
+ value = (value << 16) | getword( dep ); // lookahead of 2 bytes: always make sure that bitstream buffer
+ // contains 2 more bytes than actual bitstream
+ dep->DbitsLeft = 16;
+ }
+ tmp_value = value - (dep->Drange << dep->DbitsLeft);
+
+ if (tmp_value < 0)
+ {
+ dep->Dvalue = value;
+ return 0;
+ }
+ else
+ {
+ dep->Dvalue = tmp_value;
+ return 1;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * biari_decode_symbol_final():
+ * \return
+ * the decoded symbol
+ ************************************************************************
+ */
+unsigned int biari_decode_final(DecodingEnvironmentPtr dep)
+{
+ unsigned int range = dep->Drange - 2;
+ int value = dep->Dvalue;
+ value -= (range << dep->DbitsLeft);
+
+ if (value < 0)
+ {
+ if( range >= QUARTER )
+ {
+ dep->Drange = range;
+ return 0;
+ }
+ else
+ {
+ dep->Drange = (range << 1);
+ if( --(dep->DbitsLeft) > 0 )
+ return 0;
+ else
+ {
+ dep->Dvalue = (dep->Dvalue << 16) | getword( dep ); // lookahead of 2 bytes: always make sure that bitstream buffer
+ // contains 2 more bytes than actual bitstream
+ dep->DbitsLeft = 16;
+ return 0;
+ }
+ }
+ }
+ else
+ {
+ return 1;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initializes a given context with some pre-defined probability state
+ ************************************************************************
+ */
+void biari_init_context (int qp, BiContextTypePtr ctx, const char* ini)
+{
+ int pstate = ((ini[0]* qp )>>4) + ini[1];
+
+ if ( pstate >= 64 )
+ {
+ pstate = imin(126, pstate);
+ ctx->state = (uint16) (pstate - 64);
+ ctx->MPS = 1;
+ }
+ else
+ {
+ pstate = imax(1, pstate);
+ ctx->state = (uint16) (63 - pstate);
+ ctx->MPS = 0;
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/block.c b/Src/h264dec/ldecod/src/block.c
new file mode 100644
index 00000000..d048f956
--- /dev/null
+++ b/Src/h264dec/ldecod/src/block.c
@@ -0,0 +1,929 @@
+
+/*!
+ ***********************************************************************
+ * \file
+ * block.c
+ *
+ * \brief
+ * Block functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Inge Lille-Langoy <inge.lille-langoy@telenor.com>
+ * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
+ ***********************************************************************
+ */
+
+#include "contributors.h"
+
+#include "global.h"
+#include "block.h"
+#include "image.h"
+#include "mb_access.h"
+#include "transform.h"
+#include "quant.h"
+#include "memalloc.h"
+#include "optim.h"
+
+
+/*!
+ ****************************************************************************
+ * \brief
+ * Inverse 4x4 lossless_qpprime transformation, transforms cof to mb_rres
+ ****************************************************************************
+ */
+void itrans4x4_ls(const h264_short_block_row_t *tblock,
+ const h264_imgpel_macroblock_row_t *mb_pred,
+ h264_imgpel_macroblock_row_t *mb_rec,
+ int ioff, //!< index to 4x4 block
+ int joff) //!< index to 4x4 block
+{
+ int i,j;
+ for (j = 0; j < BLOCK_SIZE; ++j)
+ {
+ for (i = 0; i < BLOCK_SIZE; ++i)
+ {
+ mb_rec[j+joff][i+ioff] = (imgpel) iClip1(255/*max_imgpel_value*/, mb_pred[j+joff][i+ioff] + tblock[j][i]);
+ }
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Inverse residual DPCM for Intra lossless coding
+*
+************************************************************************
+*/
+void Inv_Residual_trans_4x4(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< used color plane
+ int ioff, //!< index to 4x4 block
+ int joff) //!< index to 4x4 block
+{
+ int i,j;
+ h264_short_block_t temp;
+ Slice *currSlice = currMB->p_Slice;
+ int subblock = cof4_pos_to_subblock[joff>>2][ioff>>2];
+
+ h264_short_block_row_t *tblock = currSlice->cof4[pl][subblock];
+
+ if(currMB->ipmode_DPCM == VERT_PRED)
+ {
+ for(i=0; i<4; ++i)
+ {
+ temp[0][i] = tblock[0][i];
+ temp[1][i] = tblock[1][i] + temp[0][i];
+ temp[2][i] = tblock[2][i] + temp[1][i];
+ temp[3][i] = tblock[3][i] + temp[2][i];
+ }
+ }
+ else if(currMB->ipmode_DPCM == HOR_PRED)
+ {
+ for(j=0; j<4; ++j)
+ {
+ temp[j][0] = tblock[j][0];
+ temp[j][1] = tblock[j][1] + temp[j][0];
+ temp[j][2] = tblock[j][2] + temp[j][1];
+ temp[j][3] = tblock[j][3] + temp[j][2];
+ }
+ }
+ else
+ {
+ for (j = 0; j < BLOCK_SIZE; ++j)
+ for (i = 0; i < BLOCK_SIZE; ++i)
+ temp[j][i] = tblock[j][i];
+ }
+
+ for (j = 0; j < BLOCK_SIZE; ++j)
+ {
+ for (i = 0; i < BLOCK_SIZE; ++i)
+ {
+ currSlice->mb_rec[pl][j+joff][i+ioff] = (imgpel) (temp[j][i] + currSlice->mb_pred[pl][j+joff][i+ioff]);
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Inverse residual DPCM for Intra lossless coding
+*
+* \par Input:
+* ioff_x,joff_y: Block position inside a macro block (0,8).
+************************************************************************
+*/
+//For residual DPCM
+void Inv_Residual_trans_8x8(Macroblock *currMB, ColorPlane pl, int ioff,int joff)
+{
+ Slice *currSlice = currMB->p_Slice;
+ int i, j;
+ h264_short_8x8block_t temp;
+
+ int block = (joff>>2) + (ioff>>3);
+
+ if(currMB->ipmode_DPCM == VERT_PRED)
+ {
+ for(i=0; i<8; ++i)
+ {
+ temp[0][i] = currSlice->mb_rres8[pl][block][0][i];
+ temp[1][i] = currSlice->mb_rres8[pl][block][1][i] + temp[0][i];
+ temp[2][i] = currSlice->mb_rres8[pl][block][2][i] + temp[1][i];
+ temp[3][i] = currSlice->mb_rres8[pl][block][3][i] + temp[2][i];
+ temp[4][i] = currSlice->mb_rres8[pl][block][4][i] + temp[3][i];
+ temp[5][i] = currSlice->mb_rres8[pl][block][5][i] + temp[4][i];
+ temp[6][i] = currSlice->mb_rres8[pl][block][6][i] + temp[5][i];
+ temp[7][i] = currSlice->mb_rres8[pl][block][7][i] + temp[6][i];
+ }
+ for(i=0; i<8; ++i)
+ {
+ currSlice->mb_rres8[pl][block][0][i]=temp[0][i];
+ currSlice->mb_rres8[pl][block][1][i]=temp[1][i];
+ currSlice->mb_rres8[pl][block][2][i]=temp[2][i];
+ currSlice->mb_rres8[pl][block][3][i]=temp[3][i];
+ currSlice->mb_rres8[pl][block][4][i]=temp[4][i];
+ currSlice->mb_rres8[pl][block][5][i]=temp[5][i];
+ currSlice->mb_rres8[pl][block][6][i]=temp[6][i];
+ currSlice->mb_rres8[pl][block][7][i]=temp[7][i];
+ }
+ }
+ else if(currMB->ipmode_DPCM == HOR_PRED)//HOR_PRED
+ {
+ for(i=0; i<8; ++i)
+ {
+ temp[i][0] = currSlice->mb_rres8[pl][block][i][0];
+ temp[i][1] = currSlice->mb_rres8[pl][block][i][1] + temp[i][0];
+ temp[i][2] = currSlice->mb_rres8[pl][block][i][2] + temp[i][1];
+ temp[i][3] = currSlice->mb_rres8[pl][block][i][3] + temp[i][2];
+ temp[i][4] = currSlice->mb_rres8[pl][block][i][4] + temp[i][3];
+ temp[i][5] = currSlice->mb_rres8[pl][block][i][5] + temp[i][4];
+ temp[i][6] = currSlice->mb_rres8[pl][block][i][6] + temp[i][5];
+ temp[i][7] = currSlice->mb_rres8[pl][block][i][7] + temp[i][6];
+ }
+ for(i=0; i<8; ++i)
+ {
+ currSlice->mb_rres8[pl][block][i][0]=temp[i][0];
+ currSlice->mb_rres8[pl][block][i][1]=temp[i][1];
+ currSlice->mb_rres8[pl][block][i][2]=temp[i][2];
+ currSlice->mb_rres8[pl][block][i][3]=temp[i][3];
+ currSlice->mb_rres8[pl][block][i][4]=temp[i][4];
+ currSlice->mb_rres8[pl][block][i][5]=temp[i][5];
+ currSlice->mb_rres8[pl][block][i][6]=temp[i][6];
+ currSlice->mb_rres8[pl][block][i][7]=temp[i][7];
+ }
+ }
+
+ for (j = 0; j < BLOCK_SIZE_8x8; ++j)
+ {
+ for (i = 0; i < BLOCK_SIZE_8x8; ++i)
+ {
+ currSlice->mb_rec[pl][joff+j][ioff+i] = (imgpel) (currSlice->mb_rres8[pl][block][j][i] + currSlice->mb_pred[pl][joff+j][ioff+i]);
+ }
+ }
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Luma DC inverse transform
+ ***********************************************************************
+ */
+void itrans_2(Macroblock *currMB, ColorPlane pl)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int transform_pl = IS_INDEPENDENT(p_Vid) ? PLANE_Y /*p_Vid->colour_plane_id*/ : pl;
+ h264_short_block_t *blocks = currSlice->cof4[transform_pl];
+ int qp_scaled = currMB->qp_scaled[pl];
+
+ int qp_per = p_Vid->qp_per_matrix[ qp_scaled ];
+ int qp_rem = p_Vid->qp_rem_matrix[ qp_scaled ];
+
+ int invLevelScale = currSlice->InvLevelScale4x4_Intra[pl][qp_rem][0][0];
+ h264_int_block_t M4;
+
+ // horizontal
+ M4[0][0]=blocks[0][0][0];
+ M4[0][1]=blocks[1][0][0];
+ M4[0][2]=blocks[4][0][0];
+ M4[0][3]=blocks[5][0][0];
+ M4[1][0]=blocks[2][0][0];
+ M4[1][1]=blocks[3][0][0];
+ M4[1][2]=blocks[6][0][0];
+ M4[1][3]=blocks[7][0][0];
+ M4[2][0]=blocks[8][0][0];
+ M4[2][1]=blocks[9][0][0];
+ M4[2][2]=blocks[12][0][0];
+ M4[2][3]=blocks[13][0][0];
+ M4[3][0]=blocks[10][0][0];
+ M4[3][1]=blocks[11][0][0];
+ M4[3][2]=blocks[14][0][0];
+ M4[3][3]=blocks[15][0][0];
+
+ ihadamard4x4(M4);
+
+ // vertical
+ blocks[0][0][0] = rshift_rnd((( M4[0][0] * invLevelScale) << qp_per), 6);
+ blocks[1][0][0] = rshift_rnd((( M4[0][1] * invLevelScale) << qp_per), 6);
+ blocks[4][0][0] = rshift_rnd((( M4[0][2] * invLevelScale) << qp_per), 6);
+ blocks[5][0][0] = rshift_rnd((( M4[0][3] * invLevelScale) << qp_per), 6);
+ blocks[2][0][0] = rshift_rnd((( M4[1][0] * invLevelScale) << qp_per), 6);
+ blocks[3][0][0] = rshift_rnd((( M4[1][1] * invLevelScale) << qp_per), 6);
+ blocks[6][0][0] = rshift_rnd((( M4[1][2] * invLevelScale) << qp_per), 6);
+ blocks[7][0][0] = rshift_rnd((( M4[1][3] * invLevelScale) << qp_per), 6);
+ blocks[8][0][0] = rshift_rnd((( M4[2][0] * invLevelScale) << qp_per), 6);
+ blocks[9][0][0] = rshift_rnd((( M4[2][1] * invLevelScale) << qp_per), 6);
+ blocks[12][0][0] = rshift_rnd((( M4[2][2] * invLevelScale) << qp_per), 6);
+ blocks[13][0][0] = rshift_rnd((( M4[2][3] * invLevelScale) << qp_per), 6);
+ blocks[10][0][0] = rshift_rnd((( M4[3][0] * invLevelScale) << qp_per), 6);
+ blocks[11][0][0] = rshift_rnd((( M4[3][1] * invLevelScale) << qp_per), 6);
+ blocks[14][0][0] = rshift_rnd((( M4[3][2] * invLevelScale) << qp_per), 6);
+ blocks[15][0][0] = rshift_rnd((( M4[3][3] * invLevelScale) << qp_per), 6);
+}
+
+
+void itrans_sp(h264_short_block_row_t *tblock, const h264_imgpel_macroblock_row_t *mb_pred, Macroblock *currMB, ColorPlane pl, int ioff, int joff)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ int i,j;
+ int ilev, icof;
+
+ int qp = (currSlice->slice_type == SI_SLICE) ? currSlice->qs : p_Vid->qp;
+ int qp_per = p_Vid->qp_per_matrix[ qp ];
+ int qp_rem = p_Vid->qp_rem_matrix[ qp ];
+
+ int qp_per_sp = p_Vid->qp_per_matrix[ currSlice->qs ];
+ int qp_rem_sp = p_Vid->qp_rem_matrix[ currSlice->qs ];
+ int q_bits_sp = Q_BITS + qp_per_sp;
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+
+ const int (*InvLevelScale4x4) [4] = dequant_coef[qp_rem];
+ const int (*InvLevelScale4x4SP)[4] = dequant_coef[qp_rem_sp];
+ int **PBlock;
+
+ get_mem2Dint(&PBlock, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+
+ for (j=0; j< BLOCK_SIZE; ++j)
+ for (i=0; i< BLOCK_SIZE; ++i)
+ PBlock[j][i] = mb_pred[j+joff][i+ioff];
+
+ forward4x4(PBlock, PBlock, 0, 0);
+
+ if(p_Vid->sp_switch || currSlice->slice_type==SI_SLICE)
+ {
+ for (j=0;j<BLOCK_SIZE;++j)
+ {
+ for (i=0;i<BLOCK_SIZE;++i)
+ {
+ // recovering coefficient since they are already dequantized earlier
+ icof = (tblock[j][i] >> qp_per) / InvLevelScale4x4[j][i];
+ ilev = rshift_rnd_sf(iabs(PBlock[j][i]) * quant_coef[qp_rem_sp][j][i], q_bits_sp);
+ ilev = isignab(ilev, PBlock[j][i]) + icof;
+ tblock[j][i] = ilev * InvLevelScale4x4SP[j][i] << qp_per_sp;
+ }
+ }
+ }
+ else
+ {
+ for (j=0;j<BLOCK_SIZE;++j)
+ {
+ for (i=0;i<BLOCK_SIZE;++i)
+ {
+ // recovering coefficient since they are already dequantized earlier
+ icof = (tblock[j][i] >> qp_per) / InvLevelScale4x4[j][i];
+ ilev = PBlock[j][i] + ((icof * InvLevelScale4x4[j][i] * A[j][i] << qp_per) >> 6);
+ ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][j][i], q_bits_sp);
+ tblock[j][i] = ilev * InvLevelScale4x4SP[j][i] << qp_per_sp;
+ }
+ }
+ }
+
+ {
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+ opt_itrans4x4(tblock, mb_pred, mb_rec, ioff, joff);
+ }
+
+ free_mem2Dint(PBlock);
+}
+
+void itrans_sp_cr(Macroblock *currMB, int uv)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i,j,ilev, icof, n2,n1;
+ int mp1[BLOCK_SIZE];
+ int qp_per,qp_rem;
+ int qp_per_sp,qp_rem_sp,q_bits_sp;
+ int **PBlock;
+
+ get_mem2Dint(&PBlock, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+
+
+ qp_per = p_Vid->qp_per_matrix[ ((p_Vid->qp < 0 ? p_Vid->qp : QP_SCALE_CR[p_Vid->qp]))];
+ qp_rem = p_Vid->qp_rem_matrix[ ((p_Vid->qp < 0 ? p_Vid->qp : QP_SCALE_CR[p_Vid->qp]))];
+
+ qp_per_sp = p_Vid->qp_per_matrix[ ((currSlice->qs < 0 ? currSlice->qs : QP_SCALE_CR[currSlice->qs]))];
+ qp_rem_sp = p_Vid->qp_rem_matrix[ ((currSlice->qs < 0 ? currSlice->qs : QP_SCALE_CR[currSlice->qs]))];
+ q_bits_sp = Q_BITS + qp_per_sp;
+
+ if (currSlice->slice_type == SI_SLICE)
+ {
+ qp_per = qp_per_sp;
+ qp_rem = qp_rem_sp;
+ }
+
+ for (j=0; j < p_Vid->mb_cr_size_y; ++j)
+ {
+ for (i=0; i < p_Vid->mb_cr_size_x; ++i)
+ {
+ PBlock[j][i] = currSlice->mb_pred[uv + 1][j][i];
+ currSlice->mb_pred[uv + 1][j][i] = 0;
+ }
+ }
+
+ for (n2=0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE)
+ {
+ for (n1=0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE)
+ {
+ forward4x4(PBlock, PBlock, n2, n1);
+ }
+ }
+
+ // 2X2 transform of DC coeffs.
+ mp1[0] = (PBlock[0][0] + PBlock[4][0] + PBlock[0][4] + PBlock[4][4]);
+ mp1[1] = (PBlock[0][0] - PBlock[4][0] + PBlock[0][4] - PBlock[4][4]);
+ mp1[2] = (PBlock[0][0] + PBlock[4][0] - PBlock[0][4] - PBlock[4][4]);
+ mp1[3] = (PBlock[0][0] - PBlock[4][0] - PBlock[0][4] + PBlock[4][4]);
+
+ if (p_Vid->sp_switch || currSlice->slice_type == SI_SLICE)
+ {
+ for (n2=0; n2 < 2; ++n2 )
+ {
+ for (n1=0; n1 < 2; ++n1 )
+ {
+ //quantization fo predicted block
+ ilev = rshift_rnd_sf(iabs (mp1[n1+n2*2]) * quant_coef[qp_rem_sp][0][0], q_bits_sp + 1);
+ //addition
+ ilev = isignab(ilev, mp1[n1+n2*2]) + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2][n1]][0][0];
+ //dequantization
+ mp1[n1+n2*2] =ilev * dequant_coef[qp_rem_sp][0][0] << qp_per_sp;
+ }
+ }
+
+ for (n2 = 0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE)
+ {
+ for (n1 = 0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE)
+ {
+ for (j = 0; j < BLOCK_SIZE; ++j)
+ {
+ for (i = 0; i < BLOCK_SIZE; ++i)
+ {
+ // recovering coefficient since they are already dequantized earlier
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = (currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] >> qp_per) / dequant_coef[qp_rem][j][i];
+
+ //quantization of the predicted block
+ ilev = rshift_rnd_sf(iabs(PBlock[n2 + j][n1 + i]) * quant_coef[qp_rem_sp][j][i], q_bits_sp);
+ //addition of the residual
+ ilev = isignab(ilev,PBlock[n2 + j][n1 + i]) + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] ;
+ // Inverse quantization
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = ilev * dequant_coef[qp_rem_sp][j][i] << qp_per_sp;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (n2=0; n2 < 2; ++n2 )
+ {
+ for (n1=0; n1 < 2; ++n1 )
+ {
+ ilev = mp1[n1+n2*2] + (((currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2][n1]][0][0] * dequant_coef[qp_rem][0][0] * A[0][0]) << qp_per) >> 5);
+ ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][0][0], q_bits_sp + 1);
+ //ilev = isignab(rshift_rnd_sf(iabs(ilev)* quant_coef[qp_rem_sp][0][0], q_bits_sp + 1), ilev);
+ mp1[n1+n2*2] = ilev * dequant_coef[qp_rem_sp][0][0] << qp_per_sp;
+ }
+ }
+
+ for (n2 = 0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE)
+ {
+ for (n1 = 0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE)
+ {
+ for (j = 0; j< BLOCK_SIZE; ++j)
+ {
+ for (i = 0; i< BLOCK_SIZE; ++i)
+ {
+ // recovering coefficient since they are already dequantized earlier
+ icof = (currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] >> qp_per) / dequant_coef[qp_rem][j][i];
+ //dequantization and addition of the predicted block
+ ilev = PBlock[n2 + j][n1 + i] + ((icof * dequant_coef[qp_rem][j][i] * A[j][i] << qp_per) >> 6);
+ //quantization and dequantization
+ ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][j][i], q_bits_sp);
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = ilev * dequant_coef[qp_rem_sp][j][i] << qp_per_sp;
+ }
+ }
+ }
+ }
+ }
+
+ currSlice->cof4[uv + 1][0][0][0] = (mp1[0] + mp1[1] + mp1[2] + mp1[3]) >> 1;
+ currSlice->cof4[uv + 1][1][0][0] = (mp1[0] + mp1[1] - mp1[2] - mp1[3]) >> 1;
+ currSlice->cof4[uv + 1][2][0][0] = (mp1[0] - mp1[1] + mp1[2] - mp1[3]) >> 1;
+ currSlice->cof4[uv + 1][3][0][0] = (mp1[0] - mp1[1] - mp1[2] + mp1[3]) >> 1;
+
+ free_mem2Dint(PBlock);
+}
+
+#if defined(_DEBUG) || !defined(_M_IX86)
+void iMBtrans4x4(Macroblock *currMB, ColorPlane pl, int smb)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+
+ VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1]: dec_picture->imgY;
+
+ // =============== 4x4 itrans ================
+ // -------------------------------------------
+ if (smb)
+ {
+ h264_short_block_t *blocks = currSlice->cof4[pl];
+ const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+
+ itrans_sp(blocks[0], mb_pred, currMB, pl, 0, 0);
+ itrans_sp(blocks[1], mb_pred, currMB, pl, 4, 0);
+ itrans_sp(blocks[2], mb_pred, currMB, pl, 0, 4);
+ itrans_sp(blocks[3], mb_pred, currMB, pl, 4, 4);
+ itrans_sp(blocks[4], mb_pred, currMB, pl, 8, 0);
+ itrans_sp(blocks[5], mb_pred, currMB, pl, 12, 0);
+ itrans_sp(blocks[6], mb_pred, currMB, pl, 8, 4);
+ itrans_sp(blocks[7], mb_pred, currMB, pl, 12, 4);
+ itrans_sp(blocks[8], mb_pred, currMB, pl, 0, 8);
+ itrans_sp(blocks[9], mb_pred, currMB, pl, 4, 8);
+ itrans_sp(blocks[10], mb_pred, currMB, pl, 0, 12);
+ itrans_sp(blocks[11], mb_pred, currMB, pl, 4, 12);
+ itrans_sp(blocks[12], mb_pred, currMB, pl, 8, 8);
+ itrans_sp(blocks[13], mb_pred, currMB, pl, 12, 8);
+ itrans_sp(blocks[14], mb_pred, currMB, pl, 8, 12);
+ itrans_sp(blocks[15], mb_pred, currMB, pl, 12, 12);
+ }
+ else if (currMB->is_lossless)
+ {
+ Inv_Residual_trans_4x4(currMB, pl, 0, 0);
+ Inv_Residual_trans_4x4(currMB, pl, 4, 0);
+ Inv_Residual_trans_4x4(currMB, pl, 0, 4);
+ Inv_Residual_trans_4x4(currMB, pl, 4, 4);
+ Inv_Residual_trans_4x4(currMB, pl, 8, 0);
+ Inv_Residual_trans_4x4(currMB, pl, 12, 0);
+ Inv_Residual_trans_4x4(currMB, pl, 8, 4);
+ Inv_Residual_trans_4x4(currMB, pl, 12, 4);
+ Inv_Residual_trans_4x4(currMB, pl, 0, 8);
+ Inv_Residual_trans_4x4(currMB, pl, 4, 8);
+ Inv_Residual_trans_4x4(currMB, pl, 0, 12);
+ Inv_Residual_trans_4x4(currMB, pl, 4, 12);
+ Inv_Residual_trans_4x4(currMB, pl, 8, 8);
+ Inv_Residual_trans_4x4(currMB, pl, 12, 8);
+ Inv_Residual_trans_4x4(currMB, pl, 8, 12);
+ Inv_Residual_trans_4x4(currMB, pl, 12, 12);
+ }
+ else
+ {
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+ const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0);
+ opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0);
+ opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4);
+ opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4);
+ opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8);
+ opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8);
+ opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12);
+ opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12);
+ opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8);
+ opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8);
+ opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12);
+ opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12);
+ }
+
+ // construct picture from 4x4 blocks
+ opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_rec[pl]);
+}
+#endif
+void iMBtrans8x8(Macroblock *currMB, ColorPlane pl)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1] : dec_picture->imgY;
+
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+ h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[pl];
+ h264_short_8x8block_t *mb_rres8 = currSlice->mb_rres8[pl];
+
+ if (currMB->is_lossless == FALSE)
+ {
+ opt_itrans8x8(mb_rec, mb_pred, mb_rres8[0], 0);
+ opt_itrans8x8(mb_rec, mb_pred, mb_rres8[1], 8);
+ opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[2], 0);
+ opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[3], 8);
+ }
+ else
+ {
+ itrans8x8_lossless(mb_rec, mb_pred, mb_rres8[0], 0);
+ itrans8x8_lossless(mb_rec, mb_pred, mb_rres8[1], 8);
+ itrans8x8_lossless(mb_rec+8, mb_pred+8, mb_rres8[2], 0);
+ itrans8x8_lossless(mb_rec+8, mb_pred+8, mb_rres8[3], 8);
+ }
+
+ opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, mb_rec);
+}
+
+void iTransform(Macroblock *currMB, ColorPlane pl, int smb)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+
+ int uv = pl-1;
+
+ if ((currMB->cbp & 15) != 0 || smb)
+ {
+ if(currMB->luma_transform_size_8x8_flag == 0) // 4x4 inverse transform
+ {
+ iMBtrans4x4(currMB, pl, smb);
+ }
+ else // 8x8 inverse transform
+ {
+ iMBtrans8x8(currMB, pl);
+ }
+ }
+ else
+ {
+ VideoImage *curr_img = pl ? dec_picture->imgUV[uv] : dec_picture->imgY;
+ opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[pl]);
+ }
+// TODO: fix 4x4 lossless
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ VideoImage *curUV;
+
+ for(uv=0;uv<2;++uv)
+ {
+ int pl = uv + 1;
+
+ const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+
+ // =============== 4x4 itrans ================
+ // -------------------------------------------
+ curUV = dec_picture->imgUV[uv];
+
+ if (!smb && (currMB->cbp>>4))
+ {
+ if (currMB->is_lossless == FALSE)
+ {
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec);
+ }
+ else
+ { // lossless
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ itrans4x4_ls(blocks[0], mb_pred, mb_rec, 0, 0);
+ itrans4x4_ls(blocks[1], mb_pred, mb_rec, 4, 0);
+ itrans4x4_ls(blocks[2], mb_pred, mb_rec, 0, 4);
+ itrans4x4_ls(blocks[3], mb_pred, mb_rec, 4, 4);
+ copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec);
+ }
+ }
+ else if (smb)
+ {
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ itrans_sp_cr(currMB, uv);
+
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+
+ copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec);
+ }
+ else
+ {
+ copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_pred);
+ }
+ }
+ }
+ else if (dec_picture->chroma_format_idc == YUV422)
+ {
+ VideoImage *curUV;
+
+ for(uv=0;uv<2;++uv)
+ {
+ // =============== 4x4 itrans ================
+ // -------------------------------------------
+ int pl = uv + 1;
+ const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+ curUV = dec_picture->imgUV[uv];
+
+ if (!smb && (currMB->cbp>>4))
+ {
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8);
+ opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8);
+ opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12);
+ opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12);
+
+ copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec, 8, 16);
+ }
+ else if (smb)
+ {
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ itrans_sp_cr(currMB, uv);
+
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8);
+ opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8);
+ opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12);
+ opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12);
+
+ copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec, 8, 16);
+ }
+ else
+ {
+ copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_pred, 8, 16);
+ }
+ }
+ }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Copy ImgPel Data from one structure to another (16x16)
+ *************************************************************************************
+ */
+void copy_image_data_16x16(imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x)
+{
+ int j;
+ for(j=0; j<MB_BLOCK_SIZE; ++j)
+ {
+ memcpy(&imgBuf1[j][dest_x], &imgBuf2[j][src_x], MB_BLOCK_SIZE * sizeof (imgpel));
+ }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Copy ImgPel Data from one structure to another (16x16)
+ *************************************************************************************
+ */
+#ifdef _M_IX86
+void copy_image_data_16x16_stride_sse(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source)
+{
+ ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ _asm
+ {
+ mov eax, dest
+ mov ecx, destination_stride
+ mov edx, source;
+ movaps xmm0, 0[edx]
+ movaps xmm1, 16[edx]
+ movaps xmm2, 32[edx]
+ movaps xmm3, 64[edx]
+ movups [eax], xmm0 // dest[0]
+ movups [eax+ecx], xmm1 // dest[1]
+ movups [eax+2*ecx], xmm2 // dest[2]
+ movups [eax+4*ecx], xmm3 // dest[4]
+
+ movaps xmm0, 48[edx]
+ movaps xmm1, 96[edx]
+ lea eax, [eax+2*ecx] // dest = &dest[2]
+ movups [eax+ecx], xmm0 // dest[3]
+ movups [eax+4*ecx], xmm1 // dest[6]
+
+ movaps xmm0, 80[edx]
+ movaps xmm1, 128[edx]
+ lea eax, [eax+2*ecx] // dest = &dest[2] (dest[4] from start)
+ movups [eax+ecx], xmm0 // dest[5]
+ movups [eax+4*ecx], xmm1 // dest[8]
+
+ movaps xmm0, 112[edx]
+ movaps xmm1, 160[edx]
+ lea eax, [eax+2*ecx] // dest = &dest[2] (dest[6] from start)
+ movups [eax+ecx], xmm0 // dest[7]
+ movups [eax+4*ecx], xmm1 // dest[10]
+
+ movaps xmm0, 144[edx]
+ movaps xmm1, 192[edx]
+ lea eax, [eax+2*ecx] // dest = &dest[2] (dest[8] from start)
+ movups [eax+ecx], xmm0 // dest[9]
+ movups [eax+4*ecx], xmm1 // dest[12]
+
+ movaps xmm0, 176[edx]
+ movaps xmm1, 224[edx]
+ lea eax, [eax+2*ecx] // dest = &dest[2] (dest[10] from start)
+ movups [eax+ecx], xmm0 // dest[11]
+ movups [eax+4*ecx], xmm1 // dest[14]
+
+ movaps xmm0, 208[edx]
+ movaps xmm1, 240[edx]
+ lea eax, [eax+ecx] // dest = &dest[1] (dest[11] from start)
+ movups [eax+2*ecx], xmm0 // dest[13]
+ movups [eax+4*ecx], xmm1 // dest[15]
+ }
+}
+#endif
+
+void copy_image_data_16x16_stride_c(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source)
+{
+ ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+
+ int j;
+ for(j=0; j<MB_BLOCK_SIZE; j++)
+ {
+ memcpy(dest, source[j], MB_BLOCK_SIZE * sizeof (imgpel));
+ dest+=destination_stride;
+ }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Copy ImgPel Data from one structure to another (8x8)
+ *************************************************************************************
+ */
+void copy_image_data_8x8_stride2(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2, int src_x, int src_y)
+{
+#ifdef _M_IX86
+ ptrdiff_t destination_stride = destination->stride;
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ _asm
+ {
+ mov eax, src_y
+ shl eax, 4
+ add eax, src_x
+ add eax, imgBuf2
+
+ mov edx, dest
+ mov ecx, destination_stride
+
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 16[eax]
+ movq mm2, MMWORD PTR 32[eax]
+ movq mm3, MMWORD PTR 48[eax]
+ movq mm4, MMWORD PTR 64[eax]
+ movq mm5, MMWORD PTR 80[eax]
+ movq mm6, MMWORD PTR 96[eax]
+ movq mm7, MMWORD PTR 112[eax]
+
+ movntq [edx], mm0
+ movntq [edx+ecx], mm1
+ movntq [edx+2*ecx], mm2
+ movntq [edx+4*ecx], mm4
+ add edx, ecx
+ movntq 0[edx+2*ecx], mm3
+ movntq 0[edx+4*ecx], mm5
+ add edx, ecx
+ movntq 0[edx+4*ecx], mm6
+ add edx, ecx
+ movntq 0[edx+4*ecx], mm7
+ }
+#else
+ ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ int j;
+ for(j = 0; j < BLOCK_SIZE_8x8; ++j)
+ {
+ memcpy(dest, &imgBuf2[src_y+j][src_x], BLOCK_SIZE_8x8 * sizeof (imgpel));
+ dest+=destination_stride;
+ }
+#endif
+
+}
+
+void copy_image_data_8x8_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2)
+{
+#ifdef _M_IX86
+ ptrdiff_t destination_stride = destination->stride;
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ _asm
+ {
+ mov eax, imgBuf2
+ mov edx, dest
+ mov ecx, destination_stride
+
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 16[eax]
+ movq mm2, MMWORD PTR 32[eax]
+ movq mm3, MMWORD PTR 48[eax]
+ movq mm4, MMWORD PTR 64[eax]
+ movq mm5, MMWORD PTR 80[eax]
+ movq mm6, MMWORD PTR 96[eax]
+ movq mm7, MMWORD PTR 112[eax]
+
+ movntq [edx], mm0
+ movntq [edx+ecx], mm1
+ movntq [edx+2*ecx], mm2
+ movntq [edx+4*ecx], mm4
+ add edx, ecx
+ movntq 0[edx+2*ecx], mm3
+ movntq 0[edx+4*ecx], mm5
+ add edx, ecx
+ movntq 0[edx+4*ecx], mm6
+ add edx, ecx
+ movntq 0[edx+4*ecx], mm7
+ }
+#else
+ ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ int j;
+ for(j = 0; j < BLOCK_SIZE_8x8; ++j)
+ {
+ memcpy(dest, &imgBuf2[j][0], BLOCK_SIZE_8x8 * sizeof (imgpel));
+ dest+=destination_stride;
+ }
+#endif
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Copy ImgPel Data from one structure to another (4x4)
+ *************************************************************************************
+ */
+
+void copy_image_data_4x4_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y)
+{
+ ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ h264_imgpel_macroblock_row_t *src = (h264_imgpel_macroblock_row_t *)source[src_y]; /* cast is for const */
+
+ int j;
+ for(j = 0; j < BLOCK_SIZE; ++j)
+ {
+ memcpy(dest, &src[j][src_x], BLOCK_SIZE * sizeof (imgpel));
+ dest+=destination_stride;
+ }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Copy ImgPel Data from one structure to another (8x8)
+ *************************************************************************************
+ */
+void copy_image_data(imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x, int width, int height)
+{
+ int j;
+ for(j = 0; j < height; ++j)
+ {
+ memcpy(&imgBuf1[j][dest_x], &imgBuf2[j][src_x], width * sizeof (imgpel));
+ }
+}
+
+void copy_image_data_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2, int width, int height)
+{
+ ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this
+ imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x;
+ #ifdef H264_IPP
+ IppiSize roi = {width,height};
+ ippiCopy_8u_C1R(imgBuf2[0], sizeof(imgBuf2[0]), dest, destination_stride, roi);
+#else
+ int j;
+ for(j = 0; j < height; ++j)
+ {
+ memcpy(dest, imgBuf2[j], width * sizeof (imgpel));
+ dest+=destination_stride;
+ }
+#endif
+}
diff --git a/Src/h264dec/ldecod/src/cabac.c b/Src/h264dec/ldecod/src/cabac.c
new file mode 100644
index 00000000..a3c43513
--- /dev/null
+++ b/Src/h264dec/ldecod/src/cabac.c
@@ -0,0 +1,2123 @@
+/*!
+*************************************************************************************
+* \file cabac.c
+*
+* \brief
+* CABAC entropy coding routines
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Detlev Marpe <marpe@hhi.de>
+**************************************************************************************
+*/
+
+#include "global.h"
+#include "cabac.h"
+#include "memalloc.h"
+#include "elements.h"
+#include "image.h"
+#include "biaridecod.h"
+#include "mb_access.h"
+#include "vlc.h"
+#include <mmintrin.h>
+#define get_bit(x, n) (_mm_cvtsi64_si32(_mm_srli_si64(*(__m64 *)&(x), n)) & 1)
+/*static inline int get_bit(int64 x,int n)
+{
+return (int)(((x >> n) & 1));
+}*/
+
+static __forceinline void or_bits_low(int64 *x, int mask, int position)
+{
+ *(int32_t *)x |= (mask << position);
+}
+
+static inline void or_bits(int64 *x, int mask, int position)
+{
+#ifdef _M_IX86
+ __m64 mmx_x = *(__m64 *)x;
+ __m64 mmx_mask = _mm_cvtsi32_si64(mask);
+ mmx_mask=_mm_slli_si64(mmx_mask, position);
+ mmx_x = _mm_or_si64(mmx_x, mmx_mask);
+ *(__m64 *)x = mmx_x;
+#else
+ *x |= ((int64) mask << position);
+#endif
+}
+#if TRACE
+int symbolCount = 0;
+#endif
+
+/***********************************************************************
+* L O C A L L Y D E F I N E D F U N C T I O N P R O T O T Y P E S
+***********************************************************************
+*/
+static unsigned int unary_bin_decode(DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx,
+ int ctx_offset);
+static unsigned int unary_bin_max_decode(DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx,
+ int ctx_offset,
+ unsigned int max_symbol);
+
+unsigned int unary_exp_golomb_mv_decode(DecodingEnvironmentPtr dep_dp, BiContextTypePtr ctx, unsigned int max_bin);
+unsigned int unary_exp_golomb_mv_decode3(DecodingEnvironmentPtr dep_dp, BiContextTypePtr ctx);
+
+void CheckAvailabilityOfNeighborsCABAC(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ PixelPos up, left;
+
+ p_Vid->getNeighbourLeftLuma(currMB, &left);
+ p_Vid->getNeighbourUpLuma(currMB, &up);
+
+ if (up.available)
+ currMB->mb_up = &p_Vid->mb_data[up.mb_addr];
+ else
+ currMB->mb_up = NULL;
+
+ if (left.available)
+ currMB->mb_left = &p_Vid->mb_data[left.mb_addr];
+ else
+ currMB->mb_left = NULL;
+}
+
+void cabac_new_slice(Slice *currSlice)
+{
+ currSlice->last_dquant=0;
+}
+
+/*!
+************************************************************************
+* \brief
+* Allocation of contexts models for the motion info
+* used for arithmetic decoding
+*
+************************************************************************
+*/
+MotionInfoContexts* create_contexts_MotionInfo(void)
+{
+ MotionInfoContexts *deco_ctx;
+
+ deco_ctx = (MotionInfoContexts*) calloc(1, sizeof(MotionInfoContexts) );
+ if( deco_ctx == NULL )
+ no_mem_exit("create_contexts_MotionInfo: deco_ctx");
+
+ return deco_ctx;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Allocates of contexts models for the texture info
+* used for arithmetic decoding
+************************************************************************
+*/
+TextureInfoContexts* create_contexts_TextureInfo(void)
+{
+ TextureInfoContexts *deco_ctx;
+
+ deco_ctx = (TextureInfoContexts*) calloc(1, sizeof(TextureInfoContexts) );
+ if( deco_ctx == NULL )
+ no_mem_exit("create_contexts_TextureInfo: deco_ctx");
+
+ return deco_ctx;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Frees the memory of the contexts models
+* used for arithmetic decoding of the motion info.
+************************************************************************
+*/
+void delete_contexts_MotionInfo(MotionInfoContexts *deco_ctx)
+{
+ if( deco_ctx == NULL )
+ return;
+
+ free( deco_ctx );
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Frees the memory of the contexts models
+* used for arithmetic decoding of the texture info.
+************************************************************************
+*/
+void delete_contexts_TextureInfo(TextureInfoContexts *deco_ctx)
+{
+ if( deco_ctx == NULL )
+ return;
+
+ free( deco_ctx );
+}
+
+Boolean readFieldModeInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+ int a = currMB->mb_avail_left ? p_Vid->mb_data[currMB->mb_addr_left].mb_field : 0;
+ int b = currMB->mb_avail_up ? p_Vid->mb_data[currMB->mb_addr_up].mb_field : 0;
+ int act_ctx = a + b;
+
+ return biari_decode_symbol (dep_dp, &ctx->mb_aff_contexts[act_ctx]);
+}
+
+
+int check_next_mb_and_get_field_mode_CABAC(Slice *currSlice, DataPartition *act_dp)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ BiContextTypePtr mb_type_ctx_copy[3];
+ BiContextTypePtr mb_aff_ctx_copy;
+ DecodingEnvironmentPtr dep_dp_copy;
+
+ int length;
+ DecodingEnvironmentPtr dep_dp = &(act_dp->de_cabac);
+
+ int bframe = (currSlice->slice_type == B_SLICE);
+ int skip = 0;
+ int field = 0;
+ int i;
+
+ Macroblock *currMB;
+
+ //get next MB
+ ++p_Vid->current_mb_nr;
+
+ currMB = &p_Vid->mb_data[p_Vid->current_mb_nr];
+ currMB->p_Vid = p_Vid;
+ currMB->p_Slice = currSlice;
+ currMB->slice_nr = p_Vid->current_slice_nr;
+ currMB->mb_field = p_Vid->mb_data[p_Vid->current_mb_nr-1].mb_field;
+ currMB->mbAddrX = p_Vid->current_mb_nr;
+
+ CheckAvailabilityOfNeighbors(currMB);
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+
+ //create
+ dep_dp_copy = (DecodingEnvironmentPtr) calloc(1, sizeof(DecodingEnvironment) );
+ for (i=0;i<3;++i)
+ mb_type_ctx_copy[i] = (BiContextTypePtr) calloc(NUM_MB_TYPE_CTX, sizeof(BiContextType) );
+ mb_aff_ctx_copy = (BiContextTypePtr) calloc(NUM_MB_AFF_CTX, sizeof(BiContextType) );
+
+ //copy
+ memcpy(dep_dp_copy,dep_dp,sizeof(DecodingEnvironment));
+ length = *(dep_dp_copy->Dcodestrm_len) = *(dep_dp->Dcodestrm_len);
+ for (i=0;i<3;++i)
+ memcpy(mb_type_ctx_copy[i], currSlice->mot_ctx->mb_type_contexts[i],NUM_MB_TYPE_CTX*sizeof(BiContextType) );
+ memcpy(mb_aff_ctx_copy, currSlice->mot_ctx->mb_aff_contexts,NUM_MB_AFF_CTX*sizeof(BiContextType) );
+
+ //check_next_mb
+ currSlice->last_dquant = 0;
+ skip = readMB_skip_flagInfo_CABAC(currMB, dep_dp);
+
+ if (!skip)
+ {
+ field = readFieldModeInfo_CABAC(currMB, dep_dp);
+ p_Vid->mb_data[p_Vid->current_mb_nr-1].mb_field = field;
+ }
+
+ //reset
+ p_Vid->current_mb_nr--;
+
+ memcpy(dep_dp,dep_dp_copy,sizeof(DecodingEnvironment));
+ *(dep_dp->Dcodestrm_len) = length;
+ for (i=0;i<3;++i)
+ memcpy(currSlice->mot_ctx->mb_type_contexts[i],mb_type_ctx_copy[i], NUM_MB_TYPE_CTX*sizeof(BiContextType) );
+ memcpy( currSlice->mot_ctx->mb_aff_contexts,mb_aff_ctx_copy,NUM_MB_AFF_CTX*sizeof(BiContextType) );
+
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+
+ //delete
+ free(dep_dp_copy);
+ for (i=0;i<3;++i)
+ free(mb_type_ctx_copy[i]);
+ free(mb_aff_ctx_copy);
+
+ return skip;
+}
+
+
+
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the motion
+* vector data of a B-frame MB.
+************************************************************************
+*/
+#if defined(_DEBUG) || !defined(_M_IX86)
+int decodeMVD_CABAC(DecodingEnvironmentPtr dep_dp, BiContextType mv_ctx[2][NUM_MV_RES_CTX], int act_ctx, int err)
+{
+ int act_sym = biari_decode_symbol(dep_dp,&mv_ctx[0][act_ctx+err] );
+
+ if (act_sym != 0)
+ {
+ int mv_sign;
+ act_sym = unary_exp_golomb_mv_decode3(dep_dp,mv_ctx[1]+act_ctx);
+ ++act_sym;
+ mv_sign = biari_decode_symbol_eq_prob(dep_dp);
+
+ if(mv_sign)
+ act_sym = -act_sym;
+ }
+ return act_sym;
+}
+#else
+int decodeMVD_CABAC(DecodingEnvironmentPtr dep_dp, BiContextType mv_ctx[2][NUM_MV_RES_CTX], int act_ctx, int err);
+#endif
+
+int readMVD_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int k, int list_idx, int x, int y)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+ int a = 0, b = 0;
+// int act_ctx;
+// int act_sym;
+ int mv_local_err;
+ int err;
+
+ PixelPos block_a, block_b;
+
+ p_Vid->getNeighbourPXLumaNB_NoPos(currMB, y - 1, &block_b);
+ if (block_b.available)
+ {
+ b = abs(p_Vid->mb_data[block_b.mb_addr].mvd[list_idx][block_b.y>>2][x>>2][k]);
+ if (currSlice->mb_aff_frame_flag && (k==1))
+ {
+ if ((currMB->mb_field==0) && (p_Vid->mb_data[block_b.mb_addr].mb_field==1))
+ b *= 2;
+ else if ((currMB->mb_field==1) && (p_Vid->mb_data[block_b.mb_addr].mb_field==0))
+ b /= 2;
+ }
+ }
+
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, x - 1, y , &block_a);
+ if (block_a.available)
+ {
+ a = abs(p_Vid->mb_data[block_a.mb_addr].mvd[list_idx][block_a.y>>2][block_a.x>>2][k]);
+ if (currSlice->mb_aff_frame_flag && (k==1))
+ {
+ if ((currMB->mb_field==0) && (p_Vid->mb_data[block_a.mb_addr].mb_field==1))
+ a *= 2;
+ else if ((currMB->mb_field==1) && (p_Vid->mb_data[block_a.mb_addr].mb_field==0))
+ a /= 2;
+ }
+ }
+
+ if ((mv_local_err = a + b)<3)
+ err = 0;
+ else
+ {
+ if (mv_local_err > 32)
+ err = 3;
+ else
+ err = 2;
+ }
+
+ return decodeMVD_CABAC(dep_dp, ctx->mv_res_contexts, 5*k, err);
+ /*
+ act_sym = biari_decode_symbol(dep_dp,&ctx->mv_res_contexts[0][act_ctx] );
+
+ if (act_sym != 0)
+ {
+ int mv_sign;
+ act_ctx = 5 * k;
+ act_sym = unary_exp_golomb_mv_decode3(dep_dp,ctx->mv_res_contexts[1]+act_ctx);
+ ++act_sym;
+ mv_sign = biari_decode_symbol_eq_prob(dep_dp);
+
+ if(mv_sign)
+ act_sym = -act_sym;
+ }
+ return act_sym;
+ */
+}
+
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the 8x8 block type.
+************************************************************************
+*/
+int readB8_typeInfo_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp)
+{
+ int act_sym = 0;
+ int bframe = (currSlice->slice_type == B_SLICE);
+
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+
+
+ if (!bframe)
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][1]))
+ {
+ act_sym = 0;
+ }
+ else
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][3]))
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][4])) act_sym = 2;
+ else act_sym = 3;
+ }
+ else
+ {
+ act_sym = 1;
+ }
+ }
+ }
+ else
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][0]))
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][1]))
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][2]))
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3]))
+ {
+ act_sym = 10;
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym++;
+ }
+ else
+ {
+ act_sym = 6;
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=2;
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym++;
+ }
+ }
+ else
+ {
+ act_sym=2;
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=2;
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=1;
+ }
+ }
+ else
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym = 1;
+ else act_sym = 0;
+ }
+ ++act_sym;
+ }
+ else
+ {
+ act_sym= 0;
+ }
+ }
+ return act_sym;
+}
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the macroblock
+* type info of a given MB.
+************************************************************************
+*/
+#if defined(_DEBUG) || !defined(_M_IX86)
+int readMB_skip_flagInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp)
+{
+ Slice *currSlice = currMB->p_Slice;
+ int bframe=(currSlice->slice_type == B_SLICE);
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+ int a = (currMB->mb_left != NULL) ? (currMB->mb_left->skip_flag == 0) : 0;
+ int b = (currMB->mb_up != NULL) ? (currMB->mb_up ->skip_flag == 0) : 0;
+ int act_ctx;
+ int skip;
+
+ if (bframe)
+ {
+ act_ctx = 7 + a + b;
+
+ skip = biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx]);
+ }
+ else
+ {
+ act_ctx = a + b;
+
+ skip = biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][act_ctx]);
+ }
+
+ if (skip)
+ {
+ currSlice->last_dquant = 0;
+ }
+ return skip;
+}
+#endif
+
+/*!
+***************************************************************************
+* \brief
+* This function is used to arithmetically decode the macroblock
+* intra_pred_size flag info of a given MB.
+***************************************************************************
+*/
+
+Boolean readMB_transform_size_flag_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp)
+{
+ Slice *currSlice = currMB->p_Slice;
+ TextureInfoContexts*ctx = currSlice->tex_ctx;
+
+ int b = (currMB->mb_up == NULL) ? 0 : currMB->mb_up->luma_transform_size_8x8_flag;
+ int a = (currMB->mb_left == NULL) ? 0 : currMB->mb_left->luma_transform_size_8x8_flag;
+
+ int act_ctx = a + b;
+ int act_sym = biari_decode_symbol(dep_dp, ctx->transform_size_contexts + act_ctx);
+
+ return act_sym;
+}
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the macroblock
+* type info of a given MB.
+************************************************************************
+*/
+int readMB_typeInfo_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp)
+{
+ Slice *currSlice = currMB->p_Slice;
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+
+ int a = 0, b = 0;
+ int act_ctx;
+ int act_sym;
+ int bframe=(currSlice->slice_type == B_SLICE);
+ int mode_sym;
+ int curr_mb_type;
+
+ if(currSlice->slice_type == I_SLICE) // INTRA-frame
+ {
+ if (currMB->mb_up != NULL)
+ b = (((currMB->mb_up)->mb_type != I4MB && currMB->mb_up->mb_type != I8MB) ? 1 : 0 );
+
+ if (currMB->mb_left != NULL)
+ a = (((currMB->mb_left)->mb_type != I4MB && currMB->mb_left->mb_type != I8MB) ? 1 : 0 );
+
+ act_ctx = a + b;
+ act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx);
+
+ if (act_sym==0) // 4x4 Intra
+ {
+ curr_mb_type = act_sym;
+ }
+ else // 16x16 Intra
+ {
+ mode_sym = biari_decode_final(dep_dp);
+ if(mode_sym == 1)
+ {
+ curr_mb_type = 25;
+ }
+ else
+ {
+ act_sym = 1;
+ act_ctx = 4;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); // decoding of AC/no AC
+ act_sym += mode_sym*12;
+ act_ctx = 5;
+ // decoding of cbp: 0,1,2
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ if (mode_sym!=0)
+ {
+ act_ctx=6;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ act_sym+=4;
+ if (mode_sym!=0)
+ act_sym+=4;
+ }
+ // decoding of I pred-mode: 0,1,2,3
+ act_ctx = 7;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ act_sym += mode_sym*2;
+ act_ctx = 8;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ act_sym += mode_sym;
+ curr_mb_type = act_sym;
+ }
+ }
+ }
+ else if(currSlice->slice_type == SI_SLICE) // SI-frame
+ {
+ // special ctx's for SI4MB
+ if (currMB->mb_up != NULL)
+ b = (( (currMB->mb_up)->mb_type != SI4MB) ? 1 : 0 );
+
+ if (currMB->mb_left != NULL)
+ a = (( (currMB->mb_left)->mb_type != SI4MB) ? 1 : 0 );
+
+ act_ctx = a + b;
+ act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx);
+
+ if (act_sym==0) // SI 4x4 Intra
+ {
+ curr_mb_type = 0;
+ }
+ else // analog INTRA_IMG
+ {
+ if (currMB->mb_up != NULL)
+ b = (( (currMB->mb_up)->mb_type != I4MB) ? 1 : 0 );
+
+ if (currMB->mb_left != NULL)
+ a = (( (currMB->mb_left)->mb_type != I4MB) ? 1 : 0 );
+
+ act_ctx = a + b;
+ act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx);
+
+ if (act_sym==0) // 4x4 Intra
+ {
+ curr_mb_type = 1;
+ }
+ else // 16x16 Intra
+ {
+ mode_sym = biari_decode_final(dep_dp);
+ if( mode_sym==1 )
+ {
+ curr_mb_type = 26;
+ }
+ else
+ {
+ act_sym = 2;
+ act_ctx = 4;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); // decoding of AC/no AC
+ act_sym += mode_sym*12;
+ act_ctx = 5;
+ // decoding of cbp: 0,1,2
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ if (mode_sym!=0)
+ {
+ act_ctx=6;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ act_sym+=4;
+ if (mode_sym!=0)
+ act_sym+=4;
+ }
+ // decoding of I pred-mode: 0,1,2,3
+ act_ctx = 7;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ act_sym += mode_sym*2;
+ act_ctx = 8;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx );
+ act_sym += mode_sym;
+ curr_mb_type = act_sym;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (bframe)
+ {
+ if (currMB->mb_up != NULL)
+ b = (( (currMB->mb_up)->mb_type != 0) ? 1 : 0 );
+
+ if (currMB->mb_left != NULL)
+ a = (( (currMB->mb_left)->mb_type != 0) ? 1 : 0 );
+
+ act_ctx = a + b;
+
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx]))
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][4]))
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][5]))
+ {
+ act_sym=12;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=8;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=4;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=2;
+
+ if (act_sym==24) act_sym=11;
+ else if (act_sym==26) act_sym=22;
+ else
+ {
+ if (act_sym==22) act_sym=23;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=1;
+ }
+ }
+ else
+ {
+ act_sym=3;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=4;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=2;
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=1;
+ }
+ }
+ else
+ {
+ if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym=2;
+ else act_sym=1;
+ }
+ }
+ else
+ {
+ act_sym = 0;
+ }
+ }
+ else // P-frame
+ {
+ {
+ if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][4] ))
+ {
+ if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][7] )) act_sym = 7;
+ else act_sym = 6;
+ }
+ else
+ {
+ if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][5] ))
+ {
+ if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][7] )) act_sym = 2;
+ else act_sym = 3;
+ }
+ else
+ {
+ if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][6] )) act_sym = 4;
+ else act_sym = 1;
+ }
+ }
+ }
+ }
+
+ if (act_sym<=6 || (((currSlice->slice_type == B_SLICE) ? 1 : 0) && act_sym<=23))
+ {
+ curr_mb_type = act_sym;
+ }
+ else // additional info for 16x16 Intra-mode
+ {
+ mode_sym = biari_decode_final(dep_dp);
+ if( mode_sym==1 )
+ {
+ if(bframe) // B frame
+ curr_mb_type = 48;
+ else // P frame
+ curr_mb_type = 31;
+ }
+ else
+ {
+ act_ctx = 8;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); // decoding of AC/no AC
+ act_sym += mode_sym*12;
+
+ // decoding of cbp: 0,1,2
+ act_ctx = 9;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx );
+ if (mode_sym != 0)
+ {
+ act_sym+=4;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx );
+ if (mode_sym != 0)
+ act_sym+=4;
+ }
+
+ // decoding of I pred-mode: 0,1,2,3
+ act_ctx = 10;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx );
+ act_sym += mode_sym*2;
+ mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx );
+ act_sym += mode_sym;
+ curr_mb_type = act_sym;
+ }
+ }
+ }
+ return curr_mb_type;
+}
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode a pair of
+* intra prediction modes of a given MB.
+************************************************************************
+*/
+#if defined(_DEBUG) || !defined(_M_IX86)
+int readIntraPredMode_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp)
+{
+ TextureInfoContexts *ctx = currSlice->tex_ctx;
+ int act_sym;
+
+ // use_most_probable_mode
+ act_sym = biari_decode_symbol(dep_dp, ctx->ipr_contexts);
+
+ // remaining_mode_selector
+ if (act_sym == 1)
+ {
+ return -1;
+ }
+ else
+ {
+ int pred_mode=0;
+ pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) );
+ pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 1);
+ pred_mode |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 2);
+ return pred_mode;
+ }
+}
+#endif
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the reference
+* parameter of a given MB.
+************************************************************************
+*/
+char readRefFrame_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int x, int y)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+ Macroblock *neighborMB = NULL;
+
+ int addctx = 0;
+ int a = 0, b = 0;
+ int act_ctx;
+ int act_sym;
+ PicMotion **refframe_array = dec_picture->motion.motion[list];
+
+ PixelPos block_a, block_b;
+
+ p_Vid->getNeighbourPXLuma(currMB, x, y - 1, &block_b);
+ // TODO: this gets called with x << 2 and y << 2, so we can undo the internal >> 2 easily by just passing x and y
+ if (block_b.available)
+ {
+ int b8b=((block_b.x >> 3) & 0x01)+((block_b.y>>2) & 0x02);
+ neighborMB = &p_Vid->mb_data[block_b.mb_addr];
+ if (!( (neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8b]==0 && neighborMB->b8pdir[b8b]==2)))
+ {
+ if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == TRUE))
+ b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 1 ? 2 : 0);
+ else
+ b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 0 ? 2 : 0);
+ }
+ }
+
+ p_Vid->getNeighbourXPLuma(currMB, x - 1, y , &block_a);
+ if (block_a.available)
+ {
+ int b8a=((block_a.x >> 3) & 0x01)+((block_a.y>>2) & 0x02);
+ neighborMB = &p_Vid->mb_data[block_a.mb_addr];
+ if (!((neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8a]==0 && neighborMB->b8pdir[b8a]==2)))
+ {
+ if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == 1))
+ a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 1 ? 1 : 0);
+ else
+ a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 0 ? 1 : 0);
+ }
+ }
+
+ act_ctx = a + b;
+
+ act_sym = biari_decode_symbol(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx );
+
+ if (act_sym != 0)
+ {
+ act_ctx = 4;
+ act_sym = unary_bin_decode(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx,1);
+ ++act_sym;
+ }
+ return act_sym;
+}
+
+// x == 0
+char readRefFrame_CABAC0(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int list, int y)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+ Macroblock *neighborMB = NULL;
+
+ int addctx = 0;
+ int a = 0, b = 0;
+ int act_ctx;
+ int act_sym;
+ PicMotion **refframe_array = dec_picture->motion.motion[list];
+
+ PixelPos block_a, block_b;
+
+ p_Vid->getNeighbour0XLuma(currMB, y - 1, &block_b);
+ // TODO: this gets called with x << 2 and y << 2, so we can undo the internal >> 2 easily by just passing x and y
+ if (block_b.available)
+ {
+ int b8b=0+((block_b.y>>2) & 0x02);
+ neighborMB = &p_Vid->mb_data[block_b.mb_addr];
+ if (!( (neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8b]==0 && neighborMB->b8pdir[b8b]==2)))
+ {
+ if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == TRUE))
+ b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 1 ? 2 : 0);
+ else
+ b = (refframe_array[block_b.pos_y>>2][block_b.pos_x>>2].ref_idx > 0 ? 2 : 0);
+ }
+ }
+
+ p_Vid->getNeighbourNXLuma(currMB, y , &block_a);
+ if (block_a.available)
+ {
+ int b8a=((15 >> 3) & 0x01)+((block_a.y>>2) & 0x02);
+ neighborMB = &p_Vid->mb_data[block_a.mb_addr];
+ if (!((neighborMB->mb_type==IPCM) || IS_DIRECT(neighborMB) || (neighborMB->b8mode[b8a]==0 && neighborMB->b8pdir[b8a]==2)))
+ {
+ if (currSlice->mb_aff_frame_flag && (currMB->mb_field == FALSE) && (neighborMB->mb_field == 1))
+ a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 1 ? 1 : 0);
+ else
+ a = (refframe_array[block_a.pos_y>>2][block_a.pos_x>>2].ref_idx > 0 ? 1 : 0);
+ }
+ }
+
+ act_ctx = a + b;
+
+ act_sym = biari_decode_symbol(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx );
+
+ if (act_sym != 0)
+ {
+ act_ctx = 4;
+ act_sym = unary_bin_decode(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx,1);
+ ++act_sym;
+ }
+ return act_sym;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the delta qp
+* of a given MB.
+************************************************************************
+*/
+#if defined(_DEBUG) || !defined(_M_IX86)
+short readDquant_CABAC(Slice *currSlice, DecodingEnvironmentPtr dep_dp)
+{
+ MotionInfoContexts *ctx = currSlice->mot_ctx;
+ short dquant;
+ int act_ctx = ((currSlice->last_dquant != 0) ? 1 : 0);
+ int act_sym = biari_decode_symbol(dep_dp,ctx->delta_qp_contexts + act_ctx );
+
+ if (act_sym != 0)
+ {
+ act_ctx = 2;
+ act_sym = unary_bin_decode(dep_dp,ctx->delta_qp_contexts + act_ctx,1);
+ ++act_sym;
+ }
+
+ dquant = (act_sym + 1) >> 1;
+ if((act_sym & 0x01)==0) // lsb is signed bit
+ dquant = -dquant;
+
+ currSlice->last_dquant = dquant;
+ return dquant;
+}
+#endif
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the coded
+* block pattern of a given MB.
+************************************************************************
+*/
+int readCBP_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ Slice *currSlice = currMB->p_Slice;
+ TextureInfoContexts *ctx = currSlice->tex_ctx;
+ Macroblock *neighborMB = NULL;
+
+ int a, b;
+ int curr_cbp_ctx;
+ int cbp = 0;
+ int cbp_bit;
+ PixelPos block_a;
+
+ // coding of luma part (bit by bit)
+ neighborMB = currMB->mb_up;
+ b = 0;
+
+ if (neighborMB != NULL)
+ {
+ if(neighborMB->mb_type!=IPCM)
+ b = (( (neighborMB->cbp & 4) == 0) ? 2 : 0);
+ }
+
+ p_Vid->getNeighbourLeftLuma(currMB, &block_a);
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ a = 0;
+ else
+ a = (( (p_Vid->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>3)+1))) == 0) ? 1 : 0);
+ }
+ else
+ a=0;
+
+ curr_cbp_ctx = a + b;
+ cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx );
+ //if (cbp_bit)
+ cbp += cbp_bit;//1;
+
+ if (neighborMB != NULL)
+ {
+ if(neighborMB->mb_type!=IPCM)
+ b = (( (neighborMB->cbp & 8) == 0) ? 2 : 0);
+ }
+
+ a = ( ((cbp & 1) == 0) ? 1: 0);
+
+ curr_cbp_ctx = a + b;
+
+ cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx );
+ //if (cbp_bit)
+ cbp += (cbp_bit << 1); //2;
+
+ b = ( ((cbp & 1) == 0) ? 2: 0);
+
+ p_Vid->getNeighbourNPLumaNB(currMB, 8, &block_a);
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ a = 0;
+ else
+ a = (( (p_Vid->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>3)+1))) == 0) ? 1 : 0);
+ }
+ else
+ a=0;
+
+ curr_cbp_ctx = a + b;
+ cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx );
+ //if (cbp_bit)
+ cbp += (cbp_bit << 2); //4;
+
+ b = ( ((cbp & 2) == 0) ? 2: 0);
+ a = ( ((cbp & 4) == 0) ? 1: 0);
+
+ curr_cbp_ctx = a + b;
+ cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[0] + curr_cbp_ctx );
+ //if (cbp_bit)
+ cbp += (cbp_bit << 3); //8;
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ // coding of chroma part
+ // CABAC decoding for BinIdx 0
+ b = 0;
+ neighborMB = currMB->mb_up;
+ if (neighborMB != NULL)
+ {
+ if (neighborMB->mb_type==IPCM || (neighborMB->cbp > 15))
+ b = 2;
+ }
+
+ a = 0;
+ neighborMB = currMB->mb_left;
+ if (neighborMB != NULL)
+ {
+ if (neighborMB->mb_type==IPCM || (neighborMB->cbp > 15))
+ a = 1;
+ }
+
+ curr_cbp_ctx = a + b;
+ cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[1] + curr_cbp_ctx );
+
+ // CABAC decoding for BinIdx 1
+ if (cbp_bit) // set the chroma bits
+ {
+ b = 0;
+ neighborMB = currMB->mb_up;
+ if (neighborMB != NULL)
+ {
+ //if ((neighborMB->mb_type == IPCM) || ((neighborMB->cbp > 15) && ((neighborMB->cbp >> 4) == 2)))
+ if ((neighborMB->mb_type == IPCM) || ((neighborMB->cbp >> 4) == 2))
+ b = 2;
+ }
+
+
+ a = 0;
+ neighborMB = currMB->mb_left;
+ if (neighborMB != NULL)
+ {
+ if ((neighborMB->mb_type == IPCM) || ((neighborMB->cbp >> 4) == 2))
+ a = 1;
+ }
+
+ curr_cbp_ctx = a + b;
+ cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[2] + curr_cbp_ctx );
+ cbp += (16 << cbp_bit); // ? 32 : 16;
+ }
+ }
+
+
+ if (!cbp)
+ {
+ currSlice->last_dquant = 0;
+ }
+
+ return cbp;
+}
+
+/*!
+************************************************************************
+* \brief
+* This function is used to arithmetically decode the chroma
+* intra prediction mode of a given MB.
+************************************************************************
+*/
+char readCIPredMode_CABAC(Macroblock *currMB,
+ DecodingEnvironmentPtr dep_dp)
+{
+ Slice *currSlice = currMB->p_Slice;
+ TextureInfoContexts *ctx = currSlice->tex_ctx;
+ int act_sym;
+
+ Macroblock *MbUp = currMB->mb_up;
+ Macroblock *MbLeft = currMB->mb_left;
+
+ int b = (MbUp != NULL) ? (((MbUp->c_ipred_mode != 0) && (MbUp->mb_type != IPCM)) ? 1 : 0) : 0;
+ int a = (MbLeft != NULL) ? (((MbLeft->c_ipred_mode != 0) && (MbLeft->mb_type != IPCM)) ? 1 : 0) : 0;
+ int act_ctx = a + b;
+
+ act_sym = biari_decode_symbol(dep_dp, ctx->cipr_contexts + act_ctx );
+
+ if (act_sym != 0)
+ act_sym = unary_bin_max_decode(dep_dp, ctx->cipr_contexts + 3, 0, 1) + 1;
+ return act_sym;
+
+}
+
+static const byte maxpos [] = {15, 14, 63, 31, 31, 15, 3, 14, 7, 15, 15, 14, 63, 31, 31, 15, 15, 14, 63, 31, 31, 15};
+static const byte c1isdc [] = { 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1};
+static const byte type2ctx_bcbp[] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5, 10, 11, 12, 13, 13, 14, 16, 17, 18, 19, 19, 20};
+static const byte type2ctx_map [] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; // 8
+static const byte type2ctx_last[] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; // 8
+static const byte type2ctx_one [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5, 10, 11, 12, 13, 13, 14, 16, 17, 18, 19, 19, 20}; // 7
+static const byte type2ctx_abs [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5, 10, 11, 12, 13, 13, 14, 16, 17, 18, 19, 19, 20}; // 7
+static const byte max_c2 [] = { 4, 4, 4, 4, 4, 4, 3, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; // 9
+
+
+
+/*!
+************************************************************************
+* \brief
+* Read CBP4-BIT
+************************************************************************
+*/
+static int read_and_store_CBP_block_bit_444(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ TextureInfoContexts *tex_ctx = currSlice->tex_ctx;
+
+ int y_ac = (type==LUMA_16AC || type==LUMA_8x8 || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4
+ || type==CB_16AC || type==CB_8x8 || type==CB_8x4 || type==CB_4x8 || type==CB_4x4
+ || type==CR_16AC || type==CR_8x8 || type==CR_8x4 || type==CR_4x8 || type==CR_4x4);
+ int y_dc = (type==LUMA_16DC || type==CB_16DC || type==CR_16DC);
+ int u_ac = (type==CHROMA_AC && !currMB->is_v_block);
+ int v_ac = (type==CHROMA_AC && currMB->is_v_block);
+ int chroma_dc = (type==CHROMA_DC || type==CHROMA_DC_2x4 || type==CHROMA_DC_4x4);
+ int u_dc = (chroma_dc && !currMB->is_v_block);
+ int v_dc = (chroma_dc && currMB->is_v_block);
+ int j = (y_ac || u_ac || v_ac ? currMB->subblock_y : 0);
+ int i = (y_ac || u_ac || v_ac ? currMB->subblock_x : 0);
+ int bit = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 35);
+ int default_bit = (currMB->is_intra_block ? 1 : 0);
+ int upper_bit = default_bit;
+ int left_bit = default_bit;
+ int cbp_bit = 1; // always one for 8x8 mode
+ int ctx;
+ int bit_pos_a = 0;
+ int bit_pos_b = 0;
+
+ PixelPos block_a, block_b;
+ if (y_ac)
+ {
+ get4x4NeighbourLuma(currMB, i - 1, j , &block_a);
+ get4x4NeighbourLuma(currMB, i , j - 1, &block_b);
+ if (block_a.available)
+ bit_pos_a = 4*block_a.y + block_a.x;
+ if (block_b.available)
+ bit_pos_b = 4*block_b.y + block_b.x;
+ }
+ else if (y_dc)
+ {
+ get4x4NeighbourLuma(currMB, i - 1, j , &block_a);
+ get4x4NeighbourLuma(currMB, i , j - 1, &block_b);
+ }
+ else if (u_ac||v_ac)
+ {
+ get4x4Neighbour(currMB, i - 1, j , p_Vid->mb_size[IS_CHROMA], &block_a);
+ get4x4Neighbour(currMB, i , j - 1, p_Vid->mb_size[IS_CHROMA], &block_b);
+ if (block_a.available)
+ bit_pos_a = 4*block_a.y + block_a.x;
+ if (block_b.available)
+ bit_pos_b = 4*block_b.y + block_b.x;
+ }
+ else
+ {
+ get4x4Neighbour(currMB, i - 1, j , p_Vid->mb_size[IS_CHROMA], &block_a);
+ get4x4Neighbour(currMB, i , j - 1, p_Vid->mb_size[IS_CHROMA], &block_b);
+ }
+
+ if (dec_picture->chroma_format_idc!=YUV444)
+ {
+ if (type!=LUMA_8x8)
+ {
+ //--- get bits from neighboring blocks ---
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], bit + bit_pos_b);
+ }
+
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit + bit_pos_a);
+ }
+
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx);
+ }
+ }
+ else if( IS_INDEPENDENT(p_Vid) )
+ {
+ if (type!=LUMA_8x8)
+ {
+ //--- get bits from neighbouring blocks ---
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit = 1;
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0],bit+bit_pos_b);
+ }
+
+
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit = 1;
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit+bit_pos_a);
+ }
+
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx);
+ }
+ }
+ else {
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ {
+ if(type==LUMA_8x8)
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits_8x8[0], bit + bit_pos_b);
+ else if (type==CB_8x8)
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits_8x8[1], bit + bit_pos_b);
+ else if (type==CR_8x8)
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits_8x8[2], bit + bit_pos_b);
+ else if ((type==CB_4x4)||(type==CB_4x8)||(type==CB_8x4)||(type==CB_16AC)||(type==CB_16DC))
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[1],bit+bit_pos_b);
+ else if ((type==CR_4x4)||(type==CR_4x8)||(type==CR_8x4)||(type==CR_16AC)||(type==CR_16DC))
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[2],bit+bit_pos_b);
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0],bit+bit_pos_b);
+ }
+ }
+
+
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ {
+ if(type==LUMA_8x8)
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits_8x8[0],bit+bit_pos_a);
+ else if (type==CB_8x8)
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits_8x8[1],bit+bit_pos_a);
+ else if (type==CR_8x8)
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits_8x8[2],bit+bit_pos_a);
+ else if ((type==CB_4x4)||(type==CB_4x8)||(type==CB_8x4)||(type==CB_16AC)||(type==CB_16DC))
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[1],bit+bit_pos_a);
+ else if ((type==CR_4x4)||(type==CR_4x8)||(type==CR_8x4)||(type==CR_16AC)||(type==CR_16DC))
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[2],bit+bit_pos_a);
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit+bit_pos_a);
+ }
+ }
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx);
+ }
+
+ //--- set bits for current block ---
+ bit = (y_dc ? 0 : y_ac ? 1 + j + (i >> 2) : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 + j + (i >> 2) : 35 + j + (i >> 2));
+
+ if (cbp_bit)
+ {
+ if (type==LUMA_8x8)
+ {
+ currMB->cbp_bits[0] |= ((int64) 0x33 << bit );
+
+ if (dec_picture->chroma_format_idc==YUV444)
+ {
+ currMB->cbp_bits_8x8[0] |= ((int64) 0x33 << bit );
+ }
+ }
+ else if (type==CB_8x8)
+ {
+ currMB->cbp_bits_8x8[1] |= ((int64) 0x33 << bit );
+ currMB->cbp_bits[1] |= ((int64) 0x33 << bit );
+ }
+ else if (type==CR_8x8)
+ {
+ currMB->cbp_bits_8x8[2] |= ((int64) 0x33 << bit );
+ currMB->cbp_bits[2] |= ((int64) 0x33 << bit );
+ }
+ else if (type==LUMA_8x4)
+ {
+ currMB->cbp_bits[0] |= ((int64) 0x03 << bit );
+ }
+ else if (type==CB_8x4)
+ {
+ currMB->cbp_bits[1] |= ((int64) 0x03 << bit );
+ }
+ else if (type==CR_8x4)
+ {
+ currMB->cbp_bits[2] |= ((int64) 0x03 << bit );
+ }
+ else if (type==LUMA_4x8)
+ {
+ currMB->cbp_bits[0] |= ((int64) 0x11<< bit );
+ }
+ else if (type==CB_4x8)
+ {
+ currMB->cbp_bits[1] |= ((int64)0x11<< bit );
+ }
+ else if (type==CR_4x8)
+ {
+ currMB->cbp_bits[2] |= ((int64)0x11<< bit );
+ }
+ else if ((type==CB_4x4)||(type==CB_16AC)||(type==CB_16DC))
+ {
+ currMB->cbp_bits[1] |= ((int64)0x01<<bit);
+ }
+ else if ((type==CR_4x4)||(type==CR_16AC)||(type==CR_16DC))
+ {
+ currMB->cbp_bits[2] |= ((int64)0x01<<bit);
+ }
+ else
+ {
+ currMB->cbp_bits[0] |= ((int64)0x01<<bit);
+ }
+ }
+ return cbp_bit;
+}
+
+
+
+/*!
+************************************************************************
+* \brief
+* Read CBP4-BIT
+************************************************************************
+*/
+static int read_and_store_CBP_block_bit_normal(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ TextureInfoContexts *tex_ctx = currSlice->tex_ctx;
+ int cbp_bit = 1; // always one for 8x8 mode
+
+ if (type==LUMA_16DC)
+ {
+
+ int upper_bit = 1;
+ int left_bit = 1;
+ int ctx;
+
+ PixelPos block_a, block_b;
+
+ //--- get bits from neighboring blocks ---
+ p_Vid->getNeighbour0X(currMB, -1, p_Vid->mb_size[IS_LUMA], &block_b);
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ upper_bit = (int)p_Vid->mb_data[block_b.mb_addr].cbp_bits[0]&1;
+ }
+
+ p_Vid->getNeighbourX0(currMB, -1, p_Vid->mb_size[IS_LUMA], &block_a);
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ left_bit = (int)p_Vid->mb_data[block_a.mb_addr].cbp_bits[0]&1;
+ }
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[LUMA_16DC]] + ctx);
+
+ //--- set bits for current block ---
+
+ if (cbp_bit)
+ {
+ currMB->cbp_bits[0] |= 0x01;
+ }
+ }
+ else if (type == LUMA_8x8)
+ {
+ int j = currMB->subblock_y;
+ int i = currMB->subblock_x;
+
+ //--- set bits for current block ---
+ int bit = 1 + j + (i >> 2);
+
+ or_bits(&currMB->cbp_bits[0], 0x33, bit);
+ }
+ else if (type <= LUMA_4x4) // type==LUMA_16AC || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4)
+ {
+ int j = currMB->subblock_y;
+ int i = currMB->subblock_x;
+ int bit;
+ int default_bit = (currMB->is_intra_block ? 1 : 0);
+ int upper_bit = default_bit;
+ int left_bit = default_bit;
+ int ctx;
+
+ //--- get bits from neighboring blocks ---
+ PixelPos block_a, block_b;
+ p_Vid->getNeighbourPXLumaNB_NoPos(currMB, j-1, &block_b);
+ if (block_b.available)
+ {
+ int bit_pos_b = (block_b.y&((short)~3)) + (i>>2);
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], 1 + bit_pos_b);
+ }
+
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, i-1, j, &block_a);
+ if (block_a.available)
+ {
+ int bit_pos_a = (block_a.y&((short)~3)) + (block_a.x>>2);
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],1 + bit_pos_a);
+ }
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx);
+
+
+ //--- set bits for current block ---
+ bit = 1 + j + (i >> 2);
+
+ if (cbp_bit)
+ {
+ if (type==LUMA_8x4)
+ {
+ or_bits_low(&currMB->cbp_bits[0], 0x03, bit);
+ }
+ else if (type==LUMA_4x8)
+ {
+ or_bits_low(&currMB->cbp_bits[0], 0x011, bit);
+ }
+ else
+ {
+ or_bits_low(&currMB->cbp_bits[0], 0x01, bit);
+ }
+ }
+ }
+ else if (type == CHROMA_AC)
+ {
+ int u_ac = !currMB->is_v_block;
+
+ int default_bit = (currMB->is_intra_block ? 1 : 0);
+ int upper_bit = default_bit;
+ int left_bit = default_bit;
+ int ctx;
+
+ PixelPos block_a, block_b;
+
+ int j = currMB->subblock_y;
+ int i = currMB->subblock_x;
+ int bit = (u_ac ? 19 : 35);
+
+ p_Vid->getNeighbourXP_NoPos(currMB, i - 1, j , p_Vid->mb_size[IS_CHROMA], &block_a);
+ p_Vid->getNeighbourPX_NoPos(currMB, i , j - 1, p_Vid->mb_size[IS_CHROMA], &block_b);
+
+ //--- get bits from neighboring blocks ---
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], bit + (block_b.y&((short)~3)) + (block_b.x>>2));
+ }
+
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit + (block_a.y&((short)~3)) + (block_a.x>>2));
+ }
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[CHROMA_AC]] + ctx);
+
+
+ //--- set bits for current block ---
+ if (cbp_bit)
+ {
+ or_bits(&currMB->cbp_bits[0], 0x01, bit + j + (i >> 2));
+ }
+
+ }
+ else if (type <= CHROMA_DC_4x4)
+ {
+ int v_dc = currMB->is_v_block;
+ int default_bit = (currMB->is_intra_block ? 1 : 0);
+ int upper_bit = default_bit;
+ int left_bit = default_bit;
+ int ctx;
+
+
+ PixelPos block_a, block_b;
+
+ int bit = (v_dc ? 18 : 17);
+ p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[IS_CHROMA], &block_a);
+ p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &block_b);
+ //--- get bits from neighboring blocks ---
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], bit);
+ }
+
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],bit);
+ }
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx);
+
+
+ //--- set bits for current block ---
+ if (cbp_bit)
+ {
+ or_bits(&currMB->cbp_bits[0], 0x01, bit);
+ }
+
+
+ }
+ else
+ {
+ int default_bit = (currMB->is_intra_block ? 1 : 0);
+ int upper_bit = default_bit;
+ int left_bit = default_bit;
+ int ctx;
+
+
+ PixelPos block_a, block_b;
+
+ p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[IS_CHROMA], &block_a);
+ p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &block_b);
+ //--- get bits from neighboring blocks ---
+ if (block_b.available)
+ {
+ if(p_Vid->mb_data[block_b.mb_addr].mb_type==IPCM)
+ upper_bit=1;
+ else
+ upper_bit = get_bit(p_Vid->mb_data[block_b.mb_addr].cbp_bits[0], 35);
+ }
+
+ if (block_a.available)
+ {
+ if(p_Vid->mb_data[block_a.mb_addr].mb_type==IPCM)
+ left_bit=1;
+ else
+ left_bit = get_bit(p_Vid->mb_data[block_a.mb_addr].cbp_bits[0],35);
+ }
+
+ ctx = 2 * upper_bit + left_bit;
+ //===== encode symbol =====
+ cbp_bit = biari_decode_symbol (dep_dp, tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx);
+
+
+ //--- set bits for current block ---
+ if (cbp_bit)
+ {
+ or_bits(&currMB->cbp_bits[0], 0x01, 35);
+ }
+
+
+ }
+ return cbp_bit;
+}
+
+
+void set_read_and_store_CBP(Macroblock **currMB, int chroma_format_idc)
+{
+ if (chroma_format_idc == YUV444)
+ (*currMB)->read_and_store_CBP_block_bit = read_and_store_CBP_block_bit_444;
+ else
+ (*currMB)->read_and_store_CBP_block_bit = read_and_store_CBP_block_bit_normal;
+}
+
+
+
+
+
+//===== position -> ctx for MAP =====
+//--- zig-zag scan ----
+static const byte pos2ctx_map8x8 [] = { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
+4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9, 10, 9, 8, 7,
+7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, 11,
+12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11 ,14, 10, 12, 14}; // 15 CTX
+static const byte pos2ctx_map8x4 [] = { 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 9, 8, 6, 7, 8,
+9, 10, 11, 9, 8, 6, 12, 8, 9, 10, 11, 9, 13, 13, 14, 14}; // 15 CTX
+static const byte pos2ctx_map4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14}; // 15 CTX
+static const byte pos2ctx_map2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX
+static const byte pos2ctx_map4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX
+static const byte* pos2ctx_map [] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8, pos2ctx_map8x4,
+pos2ctx_map8x4, pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4,
+pos2ctx_map2x4c, pos2ctx_map4x4c,
+pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8,pos2ctx_map8x4,
+pos2ctx_map8x4, pos2ctx_map4x4,
+pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8,pos2ctx_map8x4,
+pos2ctx_map8x4,pos2ctx_map4x4};
+//--- interlace scan ----
+//taken from ABT
+static const byte pos2ctx_map8x8i[] = { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
+6, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 11, 12, 11,
+9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 13, 13, 9,
+9, 10, 10, 8, 13, 13, 9, 9, 10, 10, 14, 14, 14, 14, 14, 14}; // 15 CTX
+static const byte pos2ctx_map8x4i[] = { 0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 3, 4, 7, 6, 8,
+9, 7, 6, 8, 9, 10, 11, 12, 12, 10, 11, 13, 13, 14, 14, 14}; // 15 CTX
+static const byte pos2ctx_map4x8i[] = { 0, 1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 6, 2, 7, 7, 8,
+8, 8, 5, 6, 9, 10, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14}; // 15 CTX
+static const byte* pos2ctx_map_int[] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i,
+pos2ctx_map4x8i,pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4,
+pos2ctx_map2x4c, pos2ctx_map4x4c,
+pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i,
+pos2ctx_map8x4i,pos2ctx_map4x4,
+pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i,
+pos2ctx_map8x4i,pos2ctx_map4x4};
+
+//===== position -> ctx for LAST =====
+static const byte pos2ctx_last8x8 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8}; // 9 CTX
+static const byte pos2ctx_last8x4 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
+3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8}; // 9 CTX
+
+static const byte pos2ctx_last4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // 15 CTX
+static const byte pos2ctx_last2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX
+static const byte pos2ctx_last4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX
+static const byte* pos2ctx_last [] = {pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8, pos2ctx_last8x4,
+pos2ctx_last8x4, pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last4x4,
+pos2ctx_last2x4c, pos2ctx_last4x4c,
+pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8,pos2ctx_last8x4,
+pos2ctx_last8x4, pos2ctx_last4x4,
+pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8,pos2ctx_last8x4,
+pos2ctx_last8x4, pos2ctx_last4x4};
+
+
+
+/*!
+************************************************************************
+* \brief
+* Read Significance MAP
+************************************************************************
+*/
+
+#if defined(_DEBUG) || defined(_M_X64)
+static int read_significance_map(TextureInfoContexts *tex_ctx, const Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type, int16_t coeff[])
+{
+ int i;
+ int coeff_ctr = 0;
+ int i0 = 0;
+ int i1 = maxpos[type];
+ const VideoParameters *p_Vid = currMB->p_Vid;
+
+ int fld = ( p_Vid->structure!=FRAME || currMB->mb_field );
+ const byte *pos2ctx_Map = (fld) ? pos2ctx_map_int[type] : pos2ctx_map[type];
+ const byte *last = pos2ctx_last[type];
+
+ BiContextTypePtr map_ctx = tex_ctx->map_contexts[fld][type2ctx_map [type]];
+ BiContextTypePtr last_ctx = tex_ctx->last_contexts[fld][type2ctx_last[type]];
+
+ if (!c1isdc[type])
+ {
+ pos2ctx_Map++;
+ last++;
+ }
+
+ for (i=0; i < i1; ++i) // if last coeff is reached, it has to be significant
+ {
+ //--- read significance symbol ---
+ if (biari_decode_symbol (dep_dp, map_ctx + pos2ctx_Map[i]))
+ {
+ coeff[i] = 1;
+ ++coeff_ctr;
+ //--- read last coefficient symbol ---
+ if (biari_decode_symbol (dep_dp, last_ctx + last[i]))
+ {
+ while (i++ < i1)
+ {
+ coeff[i] = 0;
+ }
+ return coeff_ctr;
+ //memset(&coeff[i + 1], 0, (i1 - i) * sizeof(int));
+ //i = i1;
+ }
+ }
+ else
+ {
+ coeff[i] = 0;
+ }
+ }
+ //--- last coefficient must be significant if no last symbol was received ---
+ coeff[i] = 1;
+
+
+ return coeff_ctr+1;
+}
+#endif
+/*!
+************************************************************************
+* \brief
+* Read Levels
+************************************************************************
+*/
+#if defined(_DEBUG) || defined(_M_X64)
+/*!
+************************************************************************
+* \brief
+* Exp-Golomb decoding for LEVELS
+***********************************************************************
+*/
+unsigned int exp_golomb_decode_eq_prob( DecodingEnvironmentPtr dep_dp, int k);
+static unsigned int unary_exp_golomb_level_decode( DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx)
+{
+ unsigned int symbol = biari_decode_symbol(dep_dp, ctx );
+
+ if (symbol==0)
+ return 0;
+ else
+ {
+ const unsigned int exp_start = 13;
+
+ for (symbol=0;symbol<(exp_start-1);symbol++)
+ {
+ if (!biari_decode_symbol(dep_dp, ctx))
+ return symbol;
+ }
+ return exp_golomb_decode_eq_prob(dep_dp,0)+13;
+ }
+}
+
+static void read_significant_coefficients (TextureInfoContexts *tex_ctx,
+ DecodingEnvironmentPtr dep_dp,
+ int type,
+ int16_t coeff[])
+{
+ static const int plus_one_clip4[5] = { 1, 2, 3, 4, 4 };
+ static const int plus_one_clip3[4] = { 1, 2, 3, 3 };
+ const int *c2_clip = (max_c2[type]==4)?plus_one_clip4:plus_one_clip3;
+ int i;
+ int c1 = 1;
+ int c2 = 0;
+ BiContextType *one_contexts = tex_ctx->one_contexts[type2ctx_one[type]];
+ BiContextType *abs_contexts = tex_ctx->abs_contexts[type2ctx_abs[type]];
+
+ for (i=maxpos[type]; i>=0; i--)
+ {
+ if (coeff[i]!=0)
+ {
+ coeff[i] += biari_decode_symbol (dep_dp, one_contexts + c1);
+ if (coeff[i]==2)
+ {
+ coeff[i] += unary_exp_golomb_level_decode (dep_dp, abs_contexts + c2);
+ c2 = c2_clip[c2];
+ c1=0;
+ }
+ else if (c1)
+ {
+ c1 = plus_one_clip4[c1];
+ }
+ if (biari_decode_symbol_eq_prob(dep_dp))
+ {
+ coeff[i] *= -1;
+ }
+ }
+ }
+}
+#else
+void read_significant_coefficients (TextureInfoContexts *tex_ctx,
+ DecodingEnvironmentPtr dep_dp,
+ int type,
+ int coeff[]);
+#endif
+
+/*!
+************************************************************************
+* \brief
+* Read Block-Transform Coefficients
+************************************************************************
+*/
+#if defined(_DEBUG) || defined(_M_X64)
+RunLevel readRunLevel_CABAC(Macroblock *currMB, DecodingEnvironmentPtr dep_dp, int context)
+{
+ RunLevel rl;
+ Slice *currSlice = currMB->p_Slice;
+ //--- read coefficients for whole block ---
+ if (currSlice->coeff_ctr < 0)
+ {
+ //===== decode CBP-BIT =====
+ if ((currSlice->coeff_ctr = currMB->read_and_store_CBP_block_bit (currMB, dep_dp, context) )!=0)
+ {
+ //===== decode significance map =====
+ currSlice->coeff_ctr = read_significance_map (currSlice->tex_ctx, currMB, dep_dp, context, currSlice->coeff);
+
+ //===== decode significant coefficients =====
+ read_significant_coefficients (currSlice->tex_ctx, dep_dp, context, currSlice->coeff);
+ }
+ }
+
+ //--- set run and level ---
+
+ rl.run=0;
+ if (currSlice->coeff_ctr--)
+ {
+ //--- set run and level (coefficient) ---
+ for (; currSlice->coeff[currSlice->pos] == 0; ++currSlice->pos, ++rl.run);
+ rl.level = currSlice->coeff[currSlice->pos++];
+ //--- decrement coefficient counter and re-set position ---
+ if (currSlice->coeff_ctr == 0)
+ currSlice->pos = 0;
+ return rl;
+ }
+ else
+ {
+ //--- set run and level (EOB) ---
+ currSlice->pos = 0;
+ rl.level = 0;
+ return rl;
+ }
+}
+#endif
+/*!
+************************************************************************
+* \brief
+* arideco_bits_read
+************************************************************************
+*/
+static int arideco_bits_read(const DecodingEnvironmentPtr dep)
+{
+ int tmp = ((*dep->Dcodestrm_len) << 3) - dep->DbitsLeft;
+
+#if (2==TRACE)
+ fprintf(p_trace, "tmp: %d\n", tmp);
+#endif
+ return tmp;
+}
+
+/*!
+************************************************************************
+* \brief
+* decoding of unary binarization using one or 2 distinct
+* models for the first and all remaining bins; no terminating
+* "0" for max_symbol
+***********************************************************************
+*/
+static unsigned int unary_bin_max_decode(DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx,
+ int ctx_offset,
+ unsigned int max_symbol)
+{
+ unsigned int symbol = biari_decode_symbol(dep_dp, ctx );
+
+ if (symbol==0 || (max_symbol == 0))
+ return symbol;
+ else
+ {
+ unsigned int l;
+ ctx += ctx_offset;
+ symbol = 0;
+ do
+ {
+ l = biari_decode_symbol(dep_dp, ctx);
+ ++symbol;
+ }
+ while( (l != 0) && (symbol < max_symbol) );
+
+ if ((l != 0) && (symbol == max_symbol))
+ ++symbol;
+ return symbol;
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* decoding of unary binarization using one or 2 distinct
+* models for the first and all remaining bins
+***********************************************************************
+*/
+static unsigned int unary_bin_decode(DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx,
+ int ctx_offset)
+{
+ unsigned int symbol = biari_decode_symbol(dep_dp, ctx );
+
+ if (symbol == 0)
+ return 0;
+ else
+ {
+ unsigned int l;
+ ctx += ctx_offset;;
+ symbol = 0;
+ do
+ {
+ l=biari_decode_symbol(dep_dp, ctx);
+ ++symbol;
+ }
+ while( l != 0 );
+ return symbol;
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* finding end of a slice in case this is not the end of a frame
+*
+* Unsure whether the "correction" below actually solves an off-by-one
+* problem or whether it introduces one in some cases :-( Anyway,
+* with this change the bit stream format works with CABAC again.
+* StW, 8.7.02
+************************************************************************
+*/
+int cabac_startcode_follows(Slice *currSlice, int eos_bit)
+{
+ unsigned int bit;
+
+ if( eos_bit )
+ {
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ DataPartition *dP = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+ DecodingEnvironmentPtr dep_dp = &(dP->de_cabac);
+
+ bit = biari_decode_final (dep_dp); //GB
+
+#if TRACE
+ fprintf(p_trace, "@%-6d %-63s (%3d)\n",symbolCount++, "end_of_slice_flag", bit);
+ fflush(p_trace);
+#endif
+ }
+ else
+ {
+ bit = 0;
+ }
+
+ return bit;
+}
+
+/*!
+************************************************************************
+* \brief
+* Exp Golomb binarization and decoding of a symbol
+* with prob. of 0.5r
+************************************************************************
+*/
+unsigned int exp_golomb_decode_eq_prob( DecodingEnvironmentPtr dep_dp, int k)
+{
+ unsigned int l;
+ int symbol = 0;
+ int binary_symbol = 0;
+
+ do
+ {
+ l = biari_decode_symbol_eq_prob(dep_dp);
+ if (l) // always returns 1 or zero
+ {
+ symbol += (l<<k); // l is guaranteed to be one
+ ++k;
+ }
+ }
+ while (l!=0);
+
+ while (k--) //next binary part
+ if (biari_decode_symbol_eq_prob(dep_dp)==1)
+ binary_symbol |= (1<<k);
+
+ return (unsigned int) (symbol + binary_symbol);
+}
+
+/*!
+************************************************************************
+* \brief
+* Exp-Golomb decoding for Motion Vectors
+***********************************************************************
+*/
+#if defined(_DEBUG) || defined(_M_X64)
+unsigned int unary_exp_golomb_mv_decode(DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx,
+ unsigned int max_bin)
+{
+ unsigned int symbol = biari_decode_symbol(dep_dp, ctx );
+
+ if (symbol == 0)
+ return 0;
+ else
+ {
+ const unsigned int exp_start = 8;
+
+ ++ctx;
+ for (symbol=1;symbol<exp_start;)
+ {
+ if (!biari_decode_symbol(dep_dp, ctx))
+ return symbol;
+ if ((++symbol)==2) ctx++;
+ if (symbol==max_bin)
+ ++ctx;
+ }
+
+ return exp_start + exp_golomb_decode_eq_prob(dep_dp,3);
+ }
+}
+unsigned int unary_exp_golomb_mv_decode3(DecodingEnvironmentPtr dep_dp,
+ BiContextTypePtr ctx)
+{
+ unsigned int max_bin = 3;
+ unsigned int symbol = biari_decode_symbol(dep_dp, ctx );
+
+ if (symbol == 0)
+ return 0;
+ else
+ {
+ const unsigned int exp_start = 8;
+
+ ++ctx;
+ for (symbol=1;symbol<exp_start;)
+ {
+ if (!biari_decode_symbol(dep_dp, ctx))
+ return symbol;
+ if ((++symbol)==2) ctx++;
+ if (symbol==max_bin)
+ ++ctx;
+ }
+
+ return exp_start + exp_golomb_decode_eq_prob(dep_dp,3);
+ }
+}
+#endif
+
+/*!
+************************************************************************
+* \brief
+* Read I_PCM macroblock
+************************************************************************
+*/
+void readIPCM_CABAC(Slice *currSlice, struct datapartition *dP)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ Bitstream* currStream = dP->bitstream;
+ DecodingEnvironmentPtr dep = &(dP->de_cabac);
+ byte *buf = currStream->streamBuffer;
+ int BitstreamLengthInBits = (dP->bitstream->bitstream_length << 3) + 7;
+
+ int val = 0;
+
+ int bits_read = 0;
+ int bitoffset, bitdepth;
+ int uv, i, j;
+
+ while (dep->DbitsLeft >= 8)
+ {
+ dep->Dvalue >>= 8;
+ dep->DbitsLeft -= 8;
+ (*dep->Dcodestrm_len)--;
+ }
+
+ bitoffset = (*dep->Dcodestrm_len) << 3;
+
+ // read luma values
+ bitdepth = p_Vid->bitdepth_luma;
+ for(i=0;i<MB_BLOCK_SIZE;++i)
+ {
+ for(j=0;j<MB_BLOCK_SIZE;++j)
+ {
+ bits_read += GetBits(buf, bitoffset, &val, BitstreamLengthInBits, bitdepth);
+ currSlice->ipcm[0][i][j] = val;
+ bitoffset += bitdepth;
+ }
+ }
+
+ // read chroma values
+ bitdepth = p_Vid->bitdepth_chroma;
+ if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid))
+ {
+ for (uv=1; uv<3; ++uv)
+ {
+ for(i=0;i<p_Vid->mb_cr_size_y;++i)
+ {
+ for(j=0;j<p_Vid->mb_cr_size_x;++j)
+ {
+ bits_read += GetBits(buf, bitoffset, &val, BitstreamLengthInBits, bitdepth);
+ currSlice->ipcm[uv][i][j] = val;
+ bitoffset += bitdepth;
+ }
+ }
+ }
+ }
+
+ (*dep->Dcodestrm_len) += ( bits_read >> 3);
+ if (bits_read & 7)
+ {
+ ++(*dep->Dcodestrm_len);
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/context_ini.c b/Src/h264dec/ldecod/src/context_ini.c
new file mode 100644
index 00000000..2ffcfeef
--- /dev/null
+++ b/Src/h264dec/ldecod/src/context_ini.c
@@ -0,0 +1,123 @@
+
+/*!
+ *************************************************************************************
+ * \file context_ini.c
+ *
+ * \brief
+ * CABAC context initializations
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Detlev Marpe <marpe@hhi.de>
+ * - Heiko Schwarz <hschwarz@hhi.de>
+ **************************************************************************************
+ */
+
+#define CONTEXT_INI_C
+
+#include "defines.h"
+#include "global.h"
+#include "biaridecod.h"
+#include "ctx_tables.h"
+
+
+#define IBIARI_CTX_INIT2(ii,jj,ctx,tab,num, qp) \
+{ \
+ for (i=0; i<ii; ++i) \
+ for (j=0; j<jj; ++j) \
+ { \
+ biari_init_context (qp, &(ctx[i][j]), tab ## _I[num][i][j]); \
+ } \
+}
+
+#define PBIARI_CTX_INIT2(ii,jj,ctx,tab,num, qp) \
+{ \
+ for (i=0; i<ii; ++i) \
+ for (j=0; j<jj; ++j) \
+ { \
+ biari_init_context (qp, &(ctx[i][j]), tab ## _P[num][i][j]); \
+ } \
+}
+
+
+#define IBIARI_CTX_INIT1(jj,ctx,tab,num, qp) \
+{ \
+ for (j=0; j<jj; ++j) \
+ { \
+ biari_init_context (qp, &(ctx[j]), tab ## _I[num][0][j]); \
+ } \
+}
+
+
+#define PBIARI_CTX_INIT1(jj,ctx,tab,num, qp) \
+{ \
+ { \
+ for (j=0; j<jj; ++j) \
+ { \
+ biari_init_context (qp, &(ctx[j]), tab ## _P[num][0][j]); \
+ } \
+ } \
+}
+
+void init_contexts (Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ MotionInfoContexts* mc = currSlice->mot_ctx;
+ TextureInfoContexts* tc = currSlice->tex_ctx;
+ int i, j;
+ int qp = imax(0, p_Vid->qp);
+ int model_number = currSlice->model_number;
+
+ //printf("%d -", p_Vid->currentSlice->model_number);
+
+ //--- motion coding contexts ---
+ if ((currSlice->slice_type == I_SLICE)||(currSlice->slice_type == SI_SLICE))
+ {
+ IBIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX, mc->mb_type_contexts, INIT_MB_TYPE, model_number, qp);
+ IBIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, model_number, qp);
+ IBIARI_CTX_INIT2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, model_number, qp);
+ IBIARI_CTX_INIT2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, model_number, qp);
+ IBIARI_CTX_INIT1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, model_number, qp);
+ IBIARI_CTX_INIT1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, model_number, qp);
+
+ //--- texture coding contexts ---
+ IBIARI_CTX_INIT1 ( NUM_TRANSFORM_SIZE_CTX, tc->transform_size_contexts, INIT_TRANSFORM_SIZE, model_number, qp);
+ IBIARI_CTX_INIT1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, model_number, qp);
+ IBIARI_CTX_INIT1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, model_number, qp);
+ IBIARI_CTX_INIT2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, model_number, qp);
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, model_number, qp);
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[0], INIT_MAP, model_number, qp);
+#if ENABLE_FIELD_CTX
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[1], INIT_FLD_MAP, model_number, qp);
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[1], INIT_FLD_LAST, model_number, qp);
+#endif
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[0], INIT_LAST, model_number, qp);
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, model_number, qp);
+ IBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, model_number, qp);
+ }
+ else
+ {
+ PBIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX, mc->mb_type_contexts, INIT_MB_TYPE, model_number, qp);
+ PBIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, model_number, qp);
+ PBIARI_CTX_INIT2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, model_number, qp);
+ PBIARI_CTX_INIT2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, model_number, qp);
+ PBIARI_CTX_INIT1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, model_number, qp);
+ PBIARI_CTX_INIT1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, model_number, qp);
+
+ //--- texture coding contexts ---
+ PBIARI_CTX_INIT1 ( NUM_TRANSFORM_SIZE_CTX, tc->transform_size_contexts, INIT_TRANSFORM_SIZE, model_number, qp);
+ PBIARI_CTX_INIT1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, model_number, qp);
+ PBIARI_CTX_INIT1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, model_number, qp);
+ PBIARI_CTX_INIT2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, model_number, qp);
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, model_number, qp);
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[0], INIT_MAP, model_number, qp);
+#if ENABLE_FIELD_CTX
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts[1], INIT_FLD_MAP, model_number, qp);
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[1], INIT_FLD_LAST, model_number, qp);
+#endif
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts[0], INIT_LAST, model_number, qp);
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, model_number, qp);
+ PBIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, model_number, qp);
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/erc_api.c b/Src/h264dec/ldecod/src/erc_api.c
new file mode 100644
index 00000000..48e827a0
--- /dev/null
+++ b/Src/h264dec/ldecod/src/erc_api.c
@@ -0,0 +1,371 @@
+
+/*!
+ *************************************************************************************
+ * \file erc_api.c
+ *
+ * \brief
+ * External (still inside video decoder) interface for error concealment module
+ *
+ * \author
+ * - Ari Hourunranta <ari.hourunranta@nokia.com>
+ * - Viktor Varsa <viktor.varsa@nokia.com>
+ * - Ye-Kui Wang <wyk@ieee.org>
+ *
+ *************************************************************************************
+ */
+
+
+#include "global.h"
+#include "memalloc.h"
+#include "erc_api.h"
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initinize the error concealment module
+ ************************************************************************
+ */
+void ercInit(VideoParameters *p_Vid, int pic_sizex, int pic_sizey, int flag)
+{
+ ercClose(p_Vid, p_Vid->erc_errorVar);
+ p_Vid->erc_object_list = (objectBuffer_t *) calloc((pic_sizex * pic_sizey) >> 6, sizeof(objectBuffer_t));
+ if (p_Vid->erc_object_list == NULL) no_mem_exit("ercInit: erc_object_list");
+
+ // the error concealment instance is allocated
+ p_Vid->erc_errorVar = ercOpen();
+
+ // set error concealment ON
+ ercSetErrorConcealment(p_Vid->erc_errorVar, flag);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocates data structures used in error concealment.
+ *\return
+ * The allocated ercVariables_t is returned.
+ ************************************************************************
+ */
+ercVariables_t *ercOpen( void )
+{
+ ercVariables_t *errorVar = NULL;
+
+ errorVar = (ercVariables_t *)malloc( sizeof(ercVariables_t));
+ if ( errorVar == NULL ) no_mem_exit("ercOpen: errorVar");
+
+ errorVar->nOfMBs = 0;
+ errorVar->segments = NULL;
+ errorVar->currSegment = 0;
+ errorVar->yCondition = NULL;
+ errorVar->uCondition = NULL;
+ errorVar->vCondition = NULL;
+ errorVar->prevFrameYCondition = NULL;
+
+ errorVar->concealment = 1;
+
+ return errorVar;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Resets the variables used in error detection.
+ * Should be called always when starting to decode a new frame.
+ * \param errorVar
+ * Variables for error concealment
+ * \param nOfMBs
+ * Number of macroblocks in a frame
+ * \param numOfSegments
+ * Estimated number of segments (memory reserved)
+ * \param picSizeX
+ * Width of the frame in pixels.
+ ************************************************************************
+ */
+void ercReset( ercVariables_t *errorVar, int nOfMBs, int numOfSegments, int picSizeX )
+{
+ if ( errorVar && errorVar->concealment )
+ {
+ int i = 0;
+
+ // If frame size has been changed
+ if ( nOfMBs != errorVar->nOfMBs && errorVar->yCondition != NULL )
+ {
+ free( errorVar->yCondition );
+ errorVar->yCondition = NULL;
+ free( errorVar->prevFrameYCondition );
+ errorVar->prevFrameYCondition = NULL;
+ free( errorVar->uCondition );
+ errorVar->uCondition = NULL;
+ free( errorVar->vCondition );
+ errorVar->vCondition = NULL;
+ free( errorVar->segments );
+ errorVar->segments = NULL;
+ }
+
+ // If the structures are uninitialized (first frame, or frame size is changed)
+ if ( errorVar->yCondition == NULL )
+ {
+ errorVar->segments = (ercSegment_t *)malloc( numOfSegments*sizeof(ercSegment_t) );
+ if ( errorVar->segments == NULL ) no_mem_exit("ercReset: errorVar->segments");
+ memset( errorVar->segments, 0, numOfSegments*sizeof(ercSegment_t));
+ errorVar->nOfSegments = numOfSegments;
+
+ errorVar->yCondition = (int *)malloc( 4*nOfMBs*sizeof(int) );
+ if ( errorVar->yCondition == NULL ) no_mem_exit("ercReset: errorVar->yCondition");
+ errorVar->prevFrameYCondition = (int *)malloc( 4*nOfMBs*sizeof(int) );
+ if ( errorVar->prevFrameYCondition == NULL ) no_mem_exit("ercReset: errorVar->prevFrameYCondition");
+ errorVar->uCondition = (int *)malloc( nOfMBs*sizeof(int) );
+ if ( errorVar->uCondition == NULL ) no_mem_exit("ercReset: errorVar->uCondition");
+ errorVar->vCondition = (int *)malloc( nOfMBs*sizeof(int) );
+ if ( errorVar->vCondition == NULL ) no_mem_exit("ercReset: errorVar->vCondition");
+ errorVar->nOfMBs = nOfMBs;
+ }
+ else
+ {
+ // Store the yCondition struct of the previous frame
+ int *tmp = errorVar->prevFrameYCondition;
+ errorVar->prevFrameYCondition = errorVar->yCondition;
+ errorVar->yCondition = tmp;
+ }
+
+ // Reset tables and parameters
+ memset( errorVar->yCondition, 0, 4*nOfMBs*sizeof(*errorVar->yCondition));
+ memset( errorVar->uCondition, 0, nOfMBs*sizeof(*errorVar->uCondition));
+ memset( errorVar->vCondition, 0, nOfMBs*sizeof(*errorVar->vCondition));
+
+ if (errorVar->nOfSegments != numOfSegments)
+ {
+ free( errorVar->segments );
+ errorVar->segments = NULL;
+ errorVar->segments = (ercSegment_t *)malloc( numOfSegments*sizeof(ercSegment_t) );
+ if ( errorVar->segments == NULL ) no_mem_exit("ercReset: errorVar->segments");
+ errorVar->nOfSegments = numOfSegments;
+ }
+
+ memset( errorVar->segments, 0, errorVar->nOfSegments*sizeof(ercSegment_t));
+
+ for ( ; i < errorVar->nOfSegments; i++ )
+ {
+ errorVar->segments[i].fCorrupted = 1; //! mark segments as corrupted
+ errorVar->segments[i].startMBPos = 0;
+ errorVar->segments[i].endMBPos = nOfMBs - 1;
+ }
+
+ errorVar->currSegment = 0;
+ errorVar->nOfCorruptedSegments = 0;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Resets the variables used in error detection.
+ * Should be called always when starting to decode a new frame.
+ * \param p_Vid
+ * VideoParameters variable
+ * \param errorVar
+ * Variables for error concealment
+ ************************************************************************
+ */
+void ercClose(VideoParameters *p_Vid, ercVariables_t *errorVar )
+{
+ if ( errorVar != NULL )
+ {
+ if (errorVar->yCondition != NULL)
+ {
+ free( errorVar->segments );
+ free( errorVar->yCondition );
+ free( errorVar->uCondition );
+ free( errorVar->vCondition );
+ free( errorVar->prevFrameYCondition );
+ }
+ free( errorVar );
+ errorVar = NULL;
+ }
+
+ if ( p_Vid && p_Vid->erc_object_list)
+ {
+ free(p_Vid->erc_object_list);
+ p_Vid->erc_object_list=NULL;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Sets error concealment ON/OFF. Can be invoked only between frames, not during a frame
+ * \param errorVar
+ * Variables for error concealment
+ * \param value
+ * New value
+ ************************************************************************
+ */
+void ercSetErrorConcealment( ercVariables_t *errorVar, int value )
+{
+ if ( errorVar != NULL )
+ errorVar->concealment = value;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Creates a new segment in the segment-list, and marks the start MB and bit position.
+ * If the end of the previous segment was not explicitly marked by "ercStopSegment",
+ * also marks the end of the previous segment.
+ * If needed, it reallocates the segment-list for a larger storage place.
+ * \param currMBNum
+ * The MB number where the new slice/segment starts
+ * \param segment
+ * Segment/Slice No. counted by the caller
+ * \param bitPos
+ * Bitstream pointer: number of bits read from the buffer.
+ * \param errorVar
+ * Variables for error detector
+ ************************************************************************
+ */
+void ercStartSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar )
+{
+ if ( errorVar && errorVar->concealment )
+ {
+ errorVar->currSegmentCorrupted = 0;
+
+ errorVar->segments[ segment ].fCorrupted = 0;
+ errorVar->segments[ segment ].startMBPos = currMBNum;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Marks the end position of a segment.
+ * \param currMBNum
+ * The last MB number of the previous segment
+ * \param segment
+ * Segment/Slice No. counted by the caller
+ * If (segment<0) the internal segment counter is used.
+ * \param bitPos
+ * Bitstream pointer: number of bits read from the buffer.
+ * \param errorVar
+ * Variables for error detector
+ ************************************************************************
+ */
+void ercStopSegment( int currMBNum, int segment, unsigned int bitPos, ercVariables_t *errorVar )
+{
+ if ( errorVar && errorVar->concealment )
+ {
+ errorVar->segments[ segment ].endMBPos = currMBNum; //! Changed TO 12.11.2001
+ errorVar->currSegment++;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Marks the current segment (the one which has the "currMBNum" MB in it)
+ * as lost: all the blocks of the MBs in the segment as corrupted.
+ * \param picSizeX
+ * Width of the frame in pixels.
+ * \param errorVar
+ * Variables for error detector
+ ************************************************************************
+ */
+void ercMarkCurrSegmentLost(int picSizeX, ercVariables_t *errorVar )
+{
+ if ( errorVar && errorVar->concealment )
+ {
+ int current_segment = errorVar->currSegment-1, j;
+
+ if (errorVar->currSegmentCorrupted == 0)
+ {
+ errorVar->nOfCorruptedSegments++;
+ errorVar->currSegmentCorrupted = 1;
+ }
+
+ for ( j = errorVar->segments[current_segment].startMBPos; j <= errorVar->segments[current_segment].endMBPos; j++ )
+ {
+ errorVar->yCondition[MBNum2YBlock (j, 0, picSizeX)] = ERC_BLOCK_CORRUPTED;
+ errorVar->yCondition[MBNum2YBlock (j, 1, picSizeX)] = ERC_BLOCK_CORRUPTED;
+ errorVar->yCondition[MBNum2YBlock (j, 2, picSizeX)] = ERC_BLOCK_CORRUPTED;
+ errorVar->yCondition[MBNum2YBlock (j, 3, picSizeX)] = ERC_BLOCK_CORRUPTED;
+ errorVar->uCondition[j] = ERC_BLOCK_CORRUPTED;
+ errorVar->vCondition[j] = ERC_BLOCK_CORRUPTED;
+ }
+ errorVar->segments[current_segment].fCorrupted = 1;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Marks the current segment (the one which has the "currMBNum" MB in it)
+ * as OK: all the blocks of the MBs in the segment as OK.
+ * \param picSizeX
+ * Width of the frame in pixels.
+ * \param errorVar
+ * Variables for error detector
+ ************************************************************************
+ */
+void ercMarkCurrSegmentOK(int picSizeX, ercVariables_t *errorVar )
+{
+ if ( errorVar && errorVar->concealment )
+ {
+ int current_segment = errorVar->currSegment-1, j;
+
+ // mark all the Blocks belonging to the segment as OK */
+ for ( j = errorVar->segments[current_segment].startMBPos; j <= errorVar->segments[current_segment].endMBPos; j++ )
+ {
+ errorVar->yCondition[MBNum2YBlock (j, 0, picSizeX)] = ERC_BLOCK_OK;
+ errorVar->yCondition[MBNum2YBlock (j, 1, picSizeX)] = ERC_BLOCK_OK;
+ errorVar->yCondition[MBNum2YBlock (j, 2, picSizeX)] = ERC_BLOCK_OK;
+ errorVar->yCondition[MBNum2YBlock (j, 3, picSizeX)] = ERC_BLOCK_OK;
+ errorVar->uCondition[j] = ERC_BLOCK_OK;
+ errorVar->vCondition[j] = ERC_BLOCK_OK;
+ }
+ errorVar->segments[current_segment].fCorrupted = 0;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Marks the Blocks of the given component (YUV) of the current MB as concealed.
+ * \param currMBNum
+ * Selects the segment where this MB number is in.
+ * \param comp
+ * Component to mark (0:Y, 1:U, 2:V, <0:All)
+ * \param picSizeX
+ * Width of the frame in pixels.
+ * \param errorVar
+ * Variables for error detector
+ ************************************************************************
+ */
+void ercMarkCurrMBConcealed( int currMBNum, int comp, int picSizeX, ercVariables_t *errorVar )
+{
+ int setAll = 0;
+
+ if ( errorVar && errorVar->concealment )
+ {
+ if (comp < 0)
+ {
+ setAll = 1;
+ comp = 0;
+ }
+
+ switch (comp)
+ {
+ case 0:
+ errorVar->yCondition[MBNum2YBlock (currMBNum, 0, picSizeX)] = ERC_BLOCK_CONCEALED;
+ errorVar->yCondition[MBNum2YBlock (currMBNum, 1, picSizeX)] = ERC_BLOCK_CONCEALED;
+ errorVar->yCondition[MBNum2YBlock (currMBNum, 2, picSizeX)] = ERC_BLOCK_CONCEALED;
+ errorVar->yCondition[MBNum2YBlock (currMBNum, 3, picSizeX)] = ERC_BLOCK_CONCEALED;
+ if (!setAll)
+ break;
+ case 1:
+ errorVar->uCondition[currMBNum] = ERC_BLOCK_CONCEALED;
+ if (!setAll)
+ break;
+ case 2:
+ errorVar->vCondition[currMBNum] = ERC_BLOCK_CONCEALED;
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/erc_do_i.c b/Src/h264dec/ldecod/src/erc_do_i.c
new file mode 100644
index 00000000..55d2a38f
--- /dev/null
+++ b/Src/h264dec/ldecod/src/erc_do_i.c
@@ -0,0 +1,544 @@
+
+/*!
+ *************************************************************************************
+ * \file
+ * erc_do_i.c
+ *
+ * \brief
+ * Intra (I) frame error concealment algorithms for decoder
+ *
+ * \author
+ * - Ari Hourunranta <ari.hourunranta@nokia.com>
+ * - Viktor Varsa <viktor.varsa@nokia.com>
+ * - Ye-Kui Wang <wyk@ieee.org>
+ *
+ *************************************************************************************
+ */
+
+#include "global.h"
+#include "erc_do.h"
+
+static void concealBlocks ( VideoParameters *p_Vid, int lastColumn, int lastRow, int comp, frame *recfr, int picSizeX, int *condition );
+static void pixMeanInterpolateBlock( VideoParameters *p_Vid, imgpel *src[], imgpel *block, int blockSize, int frameWidth );
+
+/*!
+ ************************************************************************
+ * \brief
+ * The main function for Intra frame concealment.
+ * Calls "concealBlocks" for each color component (Y,U,V) separately
+ * \return
+ * 0, if the concealment was not successful and simple concealment should be used
+ * 1, otherwise (even if none of the blocks were concealed)
+ * \param p_Vid
+ * image encoding parameters for current picture
+ * \param recfr
+ * Reconstructed frame buffer
+ * \param picSizeX
+ * Width of the frame in pixels
+ * \param picSizeY
+ * Height of the frame in pixels
+ * \param errorVar
+ * Variables for error concealment
+ ************************************************************************
+ */
+int ercConcealIntraFrame( VideoParameters *p_Vid, frame *recfr, int picSizeX, int picSizeY, ercVariables_t *errorVar )
+{
+ int lastColumn = 0, lastRow = 0;
+
+ // if concealment is on
+ if ( errorVar && errorVar->concealment )
+ {
+ // if there are segments to be concealed
+ if ( errorVar->nOfCorruptedSegments )
+ {
+ // Y
+ lastRow = (int) (picSizeY>>3);
+ lastColumn = (int) (picSizeX>>3);
+ concealBlocks( p_Vid, lastColumn, lastRow, 0, recfr, picSizeX, errorVar->yCondition );
+
+ // U (dimensions halved compared to Y)
+ lastRow = (int) (picSizeY>>4);
+ lastColumn = (int) (picSizeX>>4);
+ concealBlocks( p_Vid, lastColumn, lastRow, 1, recfr, picSizeX, errorVar->uCondition );
+
+ // V ( dimensions equal to U )
+ concealBlocks( p_Vid, lastColumn, lastRow, 2, recfr, picSizeX, errorVar->vCondition );
+ }
+ return 1;
+ }
+ else
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Conceals the MB at position (row, column) using pixels from predBlocks[]
+ * using pixMeanInterpolateBlock()
+ * \param p_Vid
+ * image encoding parameters for current picture
+ * \param currFrame
+ * current frame
+ * \param row
+ * y coordinate in blocks
+ * \param column
+ * x coordinate in blocks
+ * \param predBlocks[]
+ * list of neighboring source blocks (numbering 0 to 7, 1 means: use the neighbor)
+ * \param frameWidth
+ * width of frame in pixels
+ * \param mbWidthInBlocks
+ * 2 for Y, 1 for U/V components
+ ************************************************************************
+ */
+void ercPixConcealIMB(VideoParameters *p_Vid, imgpel *currFrame, int row, int column, int predBlocks[], int frameWidth, int mbWidthInBlocks)
+{
+ imgpel *src[8]={NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
+ imgpel *currBlock = NULL;
+
+ // collect the reliable neighboring blocks
+ if (predBlocks[0])
+ src[0] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + (column+mbWidthInBlocks)*8;
+ if (predBlocks[1])
+ src[1] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + (column-mbWidthInBlocks)*8;
+ if (predBlocks[2])
+ src[2] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + (column-mbWidthInBlocks)*8;
+ if (predBlocks[3])
+ src[3] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + (column+mbWidthInBlocks)*8;
+ if (predBlocks[4])
+ src[4] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + column*8;
+ if (predBlocks[5])
+ src[5] = currFrame + row*frameWidth*8 + (column-mbWidthInBlocks)*8;
+ if (predBlocks[6])
+ src[6] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + column*8;
+ if (predBlocks[7])
+ src[7] = currFrame + row*frameWidth*8 + (column+mbWidthInBlocks)*8;
+
+ currBlock = currFrame + row*frameWidth*8 + column*8;
+ pixMeanInterpolateBlock( p_Vid, src, currBlock, mbWidthInBlocks*8, frameWidth );
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * This function checks the neighbors of a Macroblock for usability in
+ * concealment. First the OK macroblocks are marked, and if there is not
+ * enough of them, then the CONCEALED ones as well.
+ * A "1" in the the output array means reliable, a "0" non reliable MB.
+ * The block order in "predBlocks":
+ * 1 4 0
+ * 5 x 7
+ * 2 6 3
+ * i.e., corners first.
+ * \return
+ * Number of useable neighbor macroblocks for concealment.
+ * \param predBlocks[]
+ * Array for indicating the valid neighbor blocks
+ * \param currRow
+ * Current block row in the frame
+ * \param currColumn
+ * Current block column in the frame
+ * \param condition
+ * The block condition (ok, lost) table
+ * \param maxRow
+ * Number of block rows in the frame
+ * \param maxColumn
+ * Number of block columns in the frame
+ * \param step
+ * Number of blocks belonging to a MB, when counting
+ * in vertical/horizontal direction. (Y:2 U,V:1)
+ * \param fNoCornerNeigh
+ * No corner neighbors are considered
+ ************************************************************************
+ */
+int ercCollect8PredBlocks( int predBlocks[], int currRow, int currColumn, int *condition,
+ int maxRow, int maxColumn, int step, byte fNoCornerNeigh )
+{
+ int srcCounter = 0;
+ int srcCountMin = (fNoCornerNeigh ? 2 : 4);
+ int threshold = ERC_BLOCK_OK;
+
+ memset( predBlocks, 0, 8*sizeof(int) );
+
+ // collect the reliable neighboring blocks
+ do
+ {
+ srcCounter = 0;
+ // top
+ if (currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn ] >= threshold )
+ { //ERC_BLOCK_OK (3) or ERC_BLOCK_CONCEALED (2)
+ predBlocks[4] = condition[ (currRow-1)*maxColumn + currColumn ];
+ srcCounter++;
+ }
+ // bottom
+ if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn ] >= threshold )
+ {
+ predBlocks[6] = condition[ (currRow+step)*maxColumn + currColumn ];
+ srcCounter++;
+ }
+
+ if ( currColumn > 0 )
+ {
+ // left
+ if ( condition[ currRow*maxColumn + currColumn - 1 ] >= threshold )
+ {
+ predBlocks[5] = condition[ currRow*maxColumn + currColumn - 1 ];
+ srcCounter++;
+ }
+
+ if ( !fNoCornerNeigh )
+ {
+ // top-left
+ if ( currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn - 1 ] >= threshold )
+ {
+ predBlocks[1] = condition[ (currRow-1)*maxColumn + currColumn - 1 ];
+ srcCounter++;
+ }
+ // bottom-left
+ if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn - 1 ] >= threshold )
+ {
+ predBlocks[2] = condition[ (currRow+step)*maxColumn + currColumn - 1 ];
+ srcCounter++;
+ }
+ }
+ }
+
+ if ( currColumn < (maxColumn-step) )
+ {
+ // right
+ if ( condition[ currRow*maxColumn+currColumn + step ] >= threshold )
+ {
+ predBlocks[7] = condition[ currRow*maxColumn+currColumn + step ];
+ srcCounter++;
+ }
+
+ if ( !fNoCornerNeigh )
+ {
+ // top-right
+ if ( currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn + step ] >= threshold )
+ {
+ predBlocks[0] = condition[ (currRow-1)*maxColumn + currColumn + step ];
+ srcCounter++;
+ }
+ // bottom-right
+ if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn + step ] >= threshold )
+ {
+ predBlocks[3] = condition[ (currRow+step)*maxColumn + currColumn + step ];
+ srcCounter++;
+ }
+ }
+ }
+ // prepare for the next round
+ threshold--;
+ if (threshold < ERC_BLOCK_CONCEALED)
+ break;
+ } while ( srcCounter < srcCountMin);
+
+ return srcCounter;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * collects prediction blocks only from the current column
+ * \return
+ * Number of usable neighbour Macroblocks for concealment.
+ * \param predBlocks[]
+ * Array for indicating the valid neighbor blocks
+ * \param currRow
+ * Current block row in the frame
+ * \param currColumn
+ * Current block column in the frame
+ * \param condition
+ * The block condition (ok, lost) table
+ * \param maxRow
+ * Number of block rows in the frame
+ * \param maxColumn
+ * Number of block columns in the frame
+ * \param step
+ * Number of blocks belonging to a MB, when counting
+ * in vertical/horizontal direction. (Y:2 U,V:1)
+ ************************************************************************
+ */
+int ercCollectColumnBlocks( int predBlocks[], int currRow, int currColumn, int *condition, int maxRow, int maxColumn, int step )
+{
+ int srcCounter = 0, threshold = ERC_BLOCK_CORRUPTED;
+
+ memset( predBlocks, 0, 8*sizeof(int) );
+
+ // in this case, row > 0 and row < 17
+ if ( condition[ (currRow-1)*maxColumn + currColumn ] > threshold )
+ {
+ predBlocks[4] = 1;
+ srcCounter++;
+ }
+ if ( condition[ (currRow+step)*maxColumn + currColumn ] > threshold )
+ {
+ predBlocks[6] = 1;
+ srcCounter++;
+ }
+
+ return srcCounter;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Core for the Intra blocks concealment.
+ * It is called for each color component (Y,U,V) separately
+ * Finds the corrupted blocks and calls pixel interpolation functions
+ * to correct them, one block at a time.
+ * Scanning is done vertically and each corrupted column is corrected
+ * bi-directionally, i.e., first block, last block, first block+1, last block -1 ...
+ * \param p_Vid
+ * image encoding parameters for current picture
+ * \param lastColumn
+ * Number of block columns in the frame
+ * \param lastRow
+ * Number of block rows in the frame
+ * \param comp
+ * color component
+ * \param recfr
+ * Reconstructed frame buffer
+ * \param picSizeX
+ * Width of the frame in pixels
+ * \param condition
+ * The block condition (ok, lost) table
+ ************************************************************************
+ */
+static void concealBlocks( VideoParameters *p_Vid, int lastColumn, int lastRow, int comp, frame *recfr, int picSizeX, int *condition )
+{
+ int row, column, srcCounter = 0, thr = ERC_BLOCK_CORRUPTED,
+ lastCorruptedRow = -1, firstCorruptedRow = -1, currRow = 0,
+ areaHeight = 0, i = 0, smoothColumn = 0;
+ int predBlocks[8], step = 1;
+
+ // in the Y component do the concealment MB-wise (not block-wise):
+ // this is useful if only whole MBs can be damaged or lost
+ if ( comp == 0 )
+ step = 2;
+ else
+ step = 1;
+
+ for ( column = 0; column < lastColumn; column += step )
+ {
+ for ( row = 0; row < lastRow; row += step )
+ {
+ if ( condition[row*lastColumn+column] <= thr )
+ {
+ firstCorruptedRow = row;
+ // find the last row which has corrupted blocks (in same continuous area)
+ for ( lastCorruptedRow = row+step; lastCorruptedRow < lastRow; lastCorruptedRow += step )
+ {
+ // check blocks in the current column
+ if ( condition[ lastCorruptedRow*lastColumn + column ] > thr )
+ {
+ // current one is already OK, so the last was the previous one
+ lastCorruptedRow -= step;
+ break;
+ }
+ }
+ if ( lastCorruptedRow >= lastRow )
+ {
+ // correct only from above
+ lastCorruptedRow = lastRow-step;
+ for ( currRow = firstCorruptedRow; currRow < lastRow; currRow += step )
+ {
+ srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 );
+
+ switch( comp )
+ {
+ case 0 :
+ ercPixConcealIMB( p_Vid, recfr->yptr, currRow, column, predBlocks, picSizeX, 2 );
+ break;
+ case 1 :
+ ercPixConcealIMB( p_Vid, recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 );
+ break;
+ case 2 :
+ ercPixConcealIMB( p_Vid, recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 );
+ break;
+ }
+
+ if ( comp == 0 )
+ {
+ condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED;
+ }
+ else
+ {
+ condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED;
+ }
+
+ }
+ row = lastRow;
+ }
+ else if ( firstCorruptedRow == 0 )
+ {
+ // correct only from below
+ for ( currRow = lastCorruptedRow; currRow >= 0; currRow -= step )
+ {
+ srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 );
+
+ switch( comp )
+ {
+ case 0 :
+ ercPixConcealIMB( p_Vid, recfr->yptr, currRow, column, predBlocks, picSizeX, 2 );
+ break;
+ case 1 :
+ ercPixConcealIMB( p_Vid, recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 );
+ break;
+ case 2 :
+ ercPixConcealIMB( p_Vid, recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 );
+ break;
+ }
+
+ if ( comp == 0 )
+ {
+ condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED;
+ }
+ else
+ {
+ condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED;
+ }
+
+ }
+
+ row = lastCorruptedRow+step;
+ }
+ else
+ {
+ // correct bi-directionally
+
+ row = lastCorruptedRow+step;
+ areaHeight = lastCorruptedRow-firstCorruptedRow+step;
+
+ // Conceal the corrupted area switching between the up and the bottom rows
+ for ( i = 0; i < areaHeight; i += step )
+ {
+ if ( i % 2 )
+ {
+ currRow = lastCorruptedRow;
+ lastCorruptedRow -= step;
+ }
+ else
+ {
+ currRow = firstCorruptedRow;
+ firstCorruptedRow += step;
+ }
+
+ if (smoothColumn > 0)
+ {
+ srcCounter = ercCollectColumnBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step );
+ }
+ else
+ {
+ srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 );
+ }
+
+ switch( comp )
+ {
+ case 0 :
+ ercPixConcealIMB( p_Vid, recfr->yptr, currRow, column, predBlocks, picSizeX, 2 );
+ break;
+
+ case 1 :
+ ercPixConcealIMB( p_Vid, recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 );
+ break;
+
+ case 2 :
+ ercPixConcealIMB( p_Vid, recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 );
+ break;
+ }
+
+ if ( comp == 0 )
+ {
+ condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED;
+ condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED;
+ }
+ else
+ {
+ condition[ currRow*lastColumn+column ] = ERC_BLOCK_CONCEALED;
+ }
+ }
+ }
+
+ lastCorruptedRow = -1;
+ firstCorruptedRow = -1;
+
+ }
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Does the actual pixel based interpolation for block[]
+ * using weighted average
+ * \param p_Vid
+ * image encoding parameters for current picture
+ * \param src[]
+ * pointers to neighboring source blocks
+ * \param block
+ * destination block
+ * \param blockSize
+ * 16 for Y, 8 for U/V components
+ * \param frameWidth
+ * Width of the frame in pixels
+ ************************************************************************
+ */
+static void pixMeanInterpolateBlock( VideoParameters *p_Vid, imgpel *src[], imgpel *block, int blockSize, int frameWidth )
+{
+ int row, column, k, tmp, srcCounter = 0, weight = 0, bmax = blockSize - 1;
+
+ k = 0;
+ for ( row = 0; row < blockSize; row++ )
+ {
+ for ( column = 0; column < blockSize; column++ )
+ {
+ tmp = 0;
+ srcCounter = 0;
+ // above
+ if ( src[4] != NULL )
+ {
+ weight = blockSize-row;
+ tmp += weight * (*(src[4]+bmax*frameWidth+column));
+ srcCounter += weight;
+ }
+ // left
+ if ( src[5] != NULL )
+ {
+ weight = blockSize-column;
+ tmp += weight * (*(src[5]+row*frameWidth+bmax));
+ srcCounter += weight;
+ }
+ // below
+ if ( src[6] != NULL )
+ {
+ weight = row+1;
+ tmp += weight * (*(src[6]+column));
+ srcCounter += weight;
+ }
+ // right
+ if ( src[7] != NULL )
+ {
+ weight = column+1;
+ tmp += weight * (*(src[7]+row*frameWidth));
+ srcCounter += weight;
+ }
+
+ if ( srcCounter > 0 )
+ block[ k + column ] = (byte)(tmp/srcCounter);
+ else
+ block[ k + column ] = blockSize == 8 ? p_Vid->dc_pred_value_comp[1] : p_Vid->dc_pred_value_comp[0];
+ }
+ k += frameWidth;
+ }
+}
diff --git a/Src/h264dec/ldecod/src/erc_do_p.c b/Src/h264dec/ldecod/src/erc_do_p.c
new file mode 100644
index 00000000..69727d2b
--- /dev/null
+++ b/Src/h264dec/ldecod/src/erc_do_p.c
@@ -0,0 +1,1742 @@
+
+/*!
+ *************************************************************************************
+ * \file
+ * erc_do_p.c
+ *
+ * \brief
+ * Inter (P) frame error concealment algorithms for decoder
+ *
+ * \author
+ * - Viktor Varsa <viktor.varsa@nokia.com>
+ * - Ye-Kui Wang <wyk@ieee.org>
+ * - Jill Boyce <jill.boyce@thomson.net>
+ * - Saurav K Bandyopadhyay <saurav@ieee.org>
+ * - Zhenyu Wu <Zhenyu.Wu@thomson.net>
+ * - Purvin Pandit <Purvin.Pandit@thomson.net>
+ *
+ *************************************************************************************
+ */
+
+#include "global.h"
+#include "mbuffer.h"
+#include "memalloc.h"
+#include "erc_do.h"
+#include "image.h"
+#include "mc_prediction.h"
+#include "macroblock.h"
+
+
+// static function declarations
+static int concealByCopy(frame *recfr, int currMBNum, objectBuffer_t *object_list, int picSizeX);
+static int concealByTrial(frame *recfr, imgpel *predMB,
+ int currMBNum, objectBuffer_t *object_list, int predBlocks[],
+ int picSizeX, int picSizeY, int *yCondition);
+static int edgeDistortion (int predBlocks[], int currYBlockNum, imgpel *predMB,
+ imgpel *recY, int picSizeX, int regionSize);
+static void copyBetweenFrames (frame *recfr, int currYBlockNum, int picSizeX, int regionSize);
+static void buildPredRegionYUV(VideoParameters *p_Vid, const short *mv, int x, int y, imgpel *predMB);
+
+// picture error concealment
+static void buildPredblockRegionYUV(VideoParameters *p_Vid, const short *mv,
+ int x, int y, imgpel *predMB, int list);
+static void CopyImgData(imgpel **inputY, imgpel ***inputUV, imgpel **outputY, imgpel ***outputUV,
+ int img_width, int img_height, int img_width_cr, int img_height_cr);
+
+static void copyPredMB (int currYBlockNum, imgpel *predMB, frame *recfr,
+ int picSizeX, int regionSize);
+static void add_node ( VideoParameters *p_Vid, struct concealment_node *ptr );
+static void delete_node( VideoParameters *p_Vid, struct concealment_node *ptr );
+
+static const int uv_div[2][4] = {{0, 1, 1, 0}, {0, 1, 0, 0}}; //[x/y][yuv_format]
+
+/*!
+ ************************************************************************
+ * \brief
+ * The main function for Inter (P) frame concealment.
+ * \return
+ * 0, if the concealment was not successful and simple concealment should be used
+ * 1, otherwise (even if none of the blocks were concealed)
+ * \param recfr
+ * Reconstructed frame buffer
+ * \param object_list
+ * Motion info for all MBs in the frame
+ * \param picSizeX
+ * Width of the frame in pixels
+ * \param picSizeY
+ * Height of the frame in pixels
+ * \param errorVar
+ * Variables for error concealment
+ * \param chroma_format_idc
+ * Chroma format IDC
+ ************************************************************************
+ */
+int ercConcealInterFrame(frame *recfr, objectBuffer_t *object_list,
+ int picSizeX, int picSizeY, ercVariables_t *errorVar, int chroma_format_idc )
+{
+ VideoParameters *p_Vid = recfr->p_Vid;
+ int lastColumn = 0, lastRow = 0, predBlocks[8];
+ int lastCorruptedRow = -1, firstCorruptedRow = -1;
+ int currRow = 0, row, column, columnInd, areaHeight = 0, i = 0;
+ imgpel *predMB;
+
+ /* if concealment is on */
+ if ( errorVar && errorVar->concealment )
+ {
+ /* if there are segments to be concealed */
+ if ( errorVar->nOfCorruptedSegments )
+ {
+ if (chroma_format_idc != YUV400)
+ predMB = (imgpel *) malloc ( (256 + (p_Vid->mb_cr_size_x * p_Vid->mb_cr_size_y)*2) * sizeof (imgpel));
+ else
+ predMB = (imgpel *) malloc(256 * sizeof (imgpel));
+
+ if ( predMB == NULL ) no_mem_exit("ercConcealInterFrame: predMB");
+
+ lastRow = (int) (picSizeY>>4);
+ lastColumn = (int) (picSizeX>>4);
+
+ for ( columnInd = 0; columnInd < lastColumn; columnInd ++)
+ {
+ column = ((columnInd%2) ? (lastColumn - columnInd/2 -1) : (columnInd/2));
+
+ for ( row = 0; row < lastRow; row++)
+ {
+
+ if ( errorVar->yCondition[MBxy2YBlock(column, row, 0, picSizeX)] <= ERC_BLOCK_CORRUPTED )
+ { // ERC_BLOCK_CORRUPTED (1) or ERC_BLOCK_EMPTY (0)
+ firstCorruptedRow = row;
+ /* find the last row which has corrupted blocks (in same continuous area) */
+ for ( lastCorruptedRow = row+1; lastCorruptedRow < lastRow; lastCorruptedRow++)
+ {
+ /* check blocks in the current column */
+ if (errorVar->yCondition[MBxy2YBlock(column, lastCorruptedRow, 0, picSizeX)] > ERC_BLOCK_CORRUPTED)
+ {
+ /* current one is already OK, so the last was the previous one */
+ lastCorruptedRow --;
+ break;
+ }
+ }
+ if ( lastCorruptedRow >= lastRow )
+ {
+ /* correct only from above */
+ lastCorruptedRow = lastRow-1;
+ for ( currRow = firstCorruptedRow; currRow < lastRow; currRow++ )
+ {
+
+ ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1),
+ errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0);
+
+ if(p_Vid->erc_mvperMB >= MVPERMB_THR)
+ concealByTrial(recfr, predMB,
+ currRow*lastColumn+column, object_list, predBlocks,
+ picSizeX, picSizeY,
+ errorVar->yCondition);
+ else
+ concealByCopy(recfr, currRow*lastColumn+column,
+ object_list, picSizeX);
+
+ ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar);
+ }
+ row = lastRow;
+ }
+ else if ( firstCorruptedRow == 0 )
+ {
+ /* correct only from below */
+ for ( currRow = lastCorruptedRow; currRow >= 0; currRow-- )
+ {
+
+ ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1),
+ errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0);
+
+ if(p_Vid->erc_mvperMB >= MVPERMB_THR)
+ concealByTrial(recfr, predMB,
+ currRow*lastColumn+column, object_list, predBlocks,
+ picSizeX, picSizeY,
+ errorVar->yCondition);
+ else
+ concealByCopy(recfr, currRow*lastColumn+column,
+ object_list, picSizeX);
+
+ ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar);
+ }
+
+ row = lastCorruptedRow+1;
+ }
+ else
+ {
+ /* correct bi-directionally */
+
+ row = lastCorruptedRow+1;
+
+ areaHeight = lastCorruptedRow-firstCorruptedRow+1;
+
+ /*
+ * Conceal the corrupted area switching between the up and the bottom rows
+ */
+ for ( i = 0; i < areaHeight; i++)
+ {
+ if ( i % 2 )
+ {
+ currRow = lastCorruptedRow;
+ lastCorruptedRow --;
+ }
+ else
+ {
+ currRow = firstCorruptedRow;
+ firstCorruptedRow ++;
+ }
+
+ ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1),
+ errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0);
+
+ if(p_Vid->erc_mvperMB >= MVPERMB_THR)
+ concealByTrial(recfr, predMB,
+ currRow*lastColumn+column, object_list, predBlocks,
+ picSizeX, picSizeY,
+ errorVar->yCondition);
+ else
+ concealByCopy(recfr, currRow*lastColumn+column,
+ object_list, picSizeX);
+
+ ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar);
+
+ }
+ }
+ lastCorruptedRow = -1;
+ firstCorruptedRow = -1;
+ }
+ }
+ }
+
+ free(predMB);
+ }
+ return 1;
+ }
+ else
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * It conceals a given MB by simply copying the pixel area from the reference image
+ * that is at the same location as the macroblock in the current image. This correcponds
+ * to COPY MBs.
+ * \return
+ * Always zero (0).
+ * \param recfr
+ * Reconstructed frame buffer
+ * \param currMBNum
+ * current MB index
+ * \param object_list
+ * Motion info for all MBs in the frame
+ * \param picSizeX
+ * Width of the frame in pixels
+ ************************************************************************
+ */
+static int concealByCopy(frame *recfr, int currMBNum,
+ objectBuffer_t *object_list, int picSizeX)
+{
+ objectBuffer_t *currRegion;
+
+ currRegion = object_list+(currMBNum<<2);
+ currRegion->regionMode = REGMODE_INTER_COPY;
+
+ currRegion->xMin = (xPosMB(currMBNum,picSizeX)<<4);
+ currRegion->yMin = (yPosMB(currMBNum,picSizeX)<<4);
+
+ copyBetweenFrames (recfr, MBNum2YBlock(currMBNum,0,picSizeX), picSizeX, 16);
+
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Copies the co-located pixel values from the reference to the current frame.
+ * Used by concealByCopy
+ * \param recfr
+ * Reconstructed frame buffer
+ * \param currYBlockNum
+ * index of the block (8x8) in the Y plane
+ * \param picSizeX
+ * Width of the frame in pixels
+ * \param regionSize
+ * can be 16 or 8 to tell the dimension of the region to copy
+ ************************************************************************
+ */
+static void copyBetweenFrames (frame *recfr, int currYBlockNum, int picSizeX, int regionSize)
+{
+ VideoParameters *p_Vid = recfr->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int j, k, location, xmin, ymin;
+ StorablePicture* refPic = p_Vid->listX[0][0];
+
+ /* set the position of the region to be copied */
+ xmin = (xPosYBlock(currYBlockNum,picSizeX)<<3);
+ ymin = (yPosYBlock(currYBlockNum,picSizeX)<<3);
+
+ for (j = ymin; j < ymin + regionSize; j++)
+ for (k = xmin; k < xmin + regionSize; k++)
+ {
+ location = j * picSizeX + k;
+//th recfr->yptr[location] = dec_picture->imgY[j][k];
+ recfr->yptr[location] = refPic->imgY->img[j][k];
+ }
+
+ for (j = ymin >> uv_div[1][dec_picture->chroma_format_idc]; j < (ymin + regionSize) >> uv_div[1][dec_picture->chroma_format_idc]; j++)
+ for (k = xmin >> uv_div[0][dec_picture->chroma_format_idc]; k < (xmin + regionSize) >> uv_div[0][dec_picture->chroma_format_idc]; k++)
+ {
+// location = j * picSizeX / 2 + k;
+ location = ((j * picSizeX) >> uv_div[0][dec_picture->chroma_format_idc]) + k;
+
+//th recfr->uptr[location] = dec_picture->imgUV[0][j][k];
+//th recfr->vptr[location] = dec_picture->imgUV[1][j][k];
+ recfr->uptr[location] = refPic->imgUV[0]->img[j][k];
+ recfr->vptr[location] = refPic->imgUV[1]->img[j][k];
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * It conceals a given MB by using the motion vectors of one reliable neighbor. That MV of a
+ * neighbor is selected wich gives the lowest pixel difference at the edges of the MB
+ * (see function edgeDistortion). This corresponds to a spatial smoothness criteria.
+ * \return
+ * Always zero (0).
+ * \param recfr
+ * Reconstructed frame buffer
+ * \param predMB
+ * memory area for storing temporary pixel values for a macroblock
+ * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320
+ * \param currMBNum
+ * current MB index
+ * \param object_list
+ * array of region structures storing region mode and mv for each region
+ * \param predBlocks
+ * status array of the neighboring blocks (if they are OK, concealed or lost)
+ * \param picSizeX
+ * Width of the frame in pixels
+ * \param picSizeY
+ * Height of the frame in pixels
+ * \param yCondition
+ * array for conditions of Y blocks from ercVariables_t
+ ************************************************************************
+ */
+static int concealByTrial(frame *recfr, imgpel *predMB,
+ int currMBNum, objectBuffer_t *object_list, int predBlocks[],
+ int picSizeX, int picSizeY, int *yCondition)
+{
+ VideoParameters *p_Vid = recfr->p_Vid;
+ int predMBNum = 0, numMBPerLine,
+ compSplit1 = 0, compSplit2 = 0, compLeft = 1, comp = 0, compPred, order = 1,
+ fInterNeighborExists, numIntraNeighbours,
+ fZeroMotionChecked, predSplitted = 0,
+ threshold = ERC_BLOCK_OK,
+ minDist, currDist, i, k, bestDir;
+ int regionSize;
+ objectBuffer_t *currRegion;
+ short mvBest[3] = {0, 0, 0}, mvPred[3] = {0, 0, 0}, *mvptr;
+
+ numMBPerLine = (int) (picSizeX>>4);
+
+ p_Vid->current_mb_nr = currMBNum;
+
+ comp = 0;
+ regionSize = 16;
+
+ do
+ { /* 4 blocks loop */
+
+ currRegion = object_list+(currMBNum<<2)+comp;
+
+ /* set the position of the region to be concealed */
+
+ currRegion->xMin = (xPosYBlock(MBNum2YBlock(currMBNum,comp,picSizeX),picSizeX)<<3);
+ currRegion->yMin = (yPosYBlock(MBNum2YBlock(currMBNum,comp,picSizeX),picSizeX)<<3);
+
+ do
+ { /* reliability loop */
+
+ minDist = 0;
+ fInterNeighborExists = 0;
+ numIntraNeighbours = 0;
+ fZeroMotionChecked = 0;
+
+ /* loop the 4 neighbours */
+ for (i = 4; i < 8; i++)
+ {
+
+ /* if reliable, try it */
+ if (predBlocks[i] >= threshold)
+ {
+ switch (i)
+ {
+ case 4:
+ predMBNum = currMBNum-numMBPerLine;
+ compSplit1 = 2;
+ compSplit2 = 3;
+ break;
+
+ case 5:
+ predMBNum = currMBNum-1;
+ compSplit1 = 1;
+ compSplit2 = 3;
+ break;
+
+ case 6:
+ predMBNum = currMBNum+numMBPerLine;
+ compSplit1 = 0;
+ compSplit2 = 1;
+ break;
+
+ case 7:
+ predMBNum = currMBNum+1;
+ compSplit1 = 0;
+ compSplit2 = 2;
+ break;
+ }
+
+ /* try the concealment with the Motion Info of the current neighbour
+ only try if the neighbour is not Intra */
+ if (isBlock(object_list,predMBNum,compSplit1,INTRA) ||
+ isBlock(object_list,predMBNum,compSplit2,INTRA))
+ {
+ numIntraNeighbours++;
+ }
+ else
+ {
+ /* if neighbour MB is splitted, try both neighbour blocks */
+ for (predSplitted = isSplitted(object_list, predMBNum),
+ compPred = compSplit1;
+ predSplitted >= 0;
+ compPred = compSplit2,
+ predSplitted -= ((compSplit1 == compSplit2) ? 2 : 1))
+ {
+
+ /* if Zero Motion Block, do the copying. This option is tried only once */
+ if (isBlock(object_list, predMBNum, compPred, INTER_COPY))
+ {
+
+ if (fZeroMotionChecked)
+ {
+ continue;
+ }
+ else
+ {
+ fZeroMotionChecked = 1;
+
+ mvPred[0] = mvPred[1] = 0;
+ mvPred[2] = 0;
+
+ buildPredRegionYUV(p_Vid->erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB);
+ }
+ }
+ /* build motion using the neighbour's Motion Parameters */
+ else if (isBlock(object_list,predMBNum,compPred,INTRA))
+ {
+ continue;
+ }
+ else
+ {
+ mvptr = getParam(object_list, predMBNum, compPred, mv);
+
+ mvPred[0] = mvptr[0];
+ mvPred[1] = mvptr[1];
+ mvPred[2] = mvptr[2];
+
+ buildPredRegionYUV(p_Vid->erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB);
+ }
+
+ /* measure absolute boundary pixel difference */
+ currDist = edgeDistortion(predBlocks,
+ MBNum2YBlock(currMBNum,comp,picSizeX),
+ predMB, recfr->yptr, picSizeX, regionSize);
+
+ /* if so far best -> store the pixels as the best concealment */
+ if (currDist < minDist || !fInterNeighborExists)
+ {
+
+ minDist = currDist;
+ bestDir = i;
+
+ for (k=0;k<3;k++)
+ mvBest[k] = mvPred[k];
+
+ currRegion->regionMode =
+ (isBlock(object_list, predMBNum, compPred, INTER_COPY)) ?
+ ((regionSize == 16) ? REGMODE_INTER_COPY : REGMODE_INTER_COPY_8x8) :
+ ((regionSize == 16) ? REGMODE_INTER_PRED : REGMODE_INTER_PRED_8x8);
+
+ copyPredMB(MBNum2YBlock(currMBNum,comp,picSizeX), predMB, recfr,
+ picSizeX, regionSize);
+ }
+
+ fInterNeighborExists = 1;
+ }
+ }
+ }
+ }
+
+ threshold--;
+
+ } while ((threshold >= ERC_BLOCK_CONCEALED) && (fInterNeighborExists == 0));
+
+ /* always try zero motion */
+ if (!fZeroMotionChecked)
+ {
+ mvPred[0] = mvPred[1] = 0;
+ mvPred[2] = 0;
+
+ buildPredRegionYUV(p_Vid->erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB);
+
+ currDist = edgeDistortion(predBlocks,
+ MBNum2YBlock(currMBNum,comp,picSizeX),
+ predMB, recfr->yptr, picSizeX, regionSize);
+
+ if (currDist < minDist || !fInterNeighborExists)
+ {
+
+ minDist = currDist;
+ for (k=0;k<3;k++)
+ mvBest[k] = mvPred[k];
+
+ currRegion->regionMode =
+ ((regionSize == 16) ? REGMODE_INTER_COPY : REGMODE_INTER_COPY_8x8);
+
+ copyPredMB(MBNum2YBlock(currMBNum,comp,picSizeX), predMB, recfr,
+ picSizeX, regionSize);
+ }
+ }
+
+ for (i=0; i<3; i++)
+ currRegion->mv[i] = mvBest[i];
+
+ yCondition[MBNum2YBlock(currMBNum,comp,picSizeX)] = ERC_BLOCK_CONCEALED;
+ comp = (comp+order+4)%4;
+ compLeft--;
+
+ } while (compLeft);
+
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* Builds the motion prediction pixels from the given location (in 1/4 pixel units)
+* of the reference frame. It not only copies the pixel values but builds the interpolation
+* when the pixel positions to be copied from is not full pixel (any 1/4 pixel position).
+* It copies the resulting pixel vlaues into predMB.
+* \param p_Vid
+* The pointer of img_par struture of current frame
+* \param mv
+* The pointer of the predicted MV of the current (being concealed) MB
+* \param x
+* The x-coordinate of the above-left corner pixel of the current MB
+* \param y
+* The y-coordinate of the above-left corner pixel of the current MB
+* \param predMB
+* memory area for storing temporary pixel values for a macroblock
+* the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320
+************************************************************************
+*/
+static void buildPredRegionYUV(VideoParameters *p_Vid, const short *mv, int x, int y, imgpel *predMB)
+{
+ int i=0, j=0, ii=0, jj=0,i1=0,j1=0,j4=0,i4=0;
+ int jf=0;
+ int uv;
+ int vec1_x=0,vec1_y=0;
+ int ioff,joff;
+ imgpel *pMB = predMB;
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0;
+ int mv_mul;
+
+ //FRExt
+ int f1_x, f1_y, f2_x, f2_y, f3, f4, ifx;
+ int b8, b4;
+ int yuv = dec_picture->chroma_format_idc - 1;
+
+ int ref_frame = imax (mv[2], 0); // !!KS: quick fix, we sometimes seem to get negative ref_pic here, so restrict to zero and above
+ int mb_nr = p_Vid->current_mb_nr;
+
+ Macroblock *currMB = &p_Vid->mb_data[mb_nr]; // intialization code deleted, see below, StW
+ Slice *currSlice = currMB->p_Slice;
+
+ h264_imgpel_macroblock_t tmp_block;
+
+ /* Update coordinates of the current concealed macroblock */
+ p_Vid->mb_x = x/MB_BLOCK_SIZE;
+ p_Vid->mb_y = y/MB_BLOCK_SIZE;
+ p_Vid->block_y = p_Vid->mb_y * BLOCK_SIZE;
+ p_Vid->pix_c_y = p_Vid->mb_y * p_Vid->mb_cr_size_y;
+ p_Vid->block_x = p_Vid->mb_x * BLOCK_SIZE;
+ p_Vid->pix_c_x = p_Vid->mb_x * p_Vid->mb_cr_size_x;
+
+ mv_mul=4;
+
+ // luma *******************************************************
+
+ for(j=0;j<MB_BLOCK_SIZE/BLOCK_SIZE;j++)
+ {
+ joff=j*4;
+ j4=p_Vid->block_y+j;
+ for(i=0;i<MB_BLOCK_SIZE/BLOCK_SIZE;i++)
+ {
+ ioff=i*4;
+ i4=p_Vid->block_x+i;
+
+ vec1_x = i4*4*mv_mul + mv[0];
+ vec1_y = j4*4*mv_mul + mv[1];
+
+ get_block_luma(currMB, PLANE_Y, p_Vid->listX[0][ref_frame], i4, j4, mv, BLOCK_SIZE, BLOCK_SIZE, tmp_block);
+
+ for(ii=0;ii<BLOCK_SIZE;ii++)
+ for(jj=0;jj<MB_BLOCK_SIZE/BLOCK_SIZE;jj++)
+ currSlice->mb_pred[LumaComp][jj+joff][ii+ioff]=tmp_block[jj][ii];
+ }
+ }
+
+
+ for (j = 0; j < 16; j++)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ pMB[j*16+i] = currSlice->mb_pred[LumaComp][j][i];
+ }
+ }
+ pMB += 256;
+
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ // chroma *******************************************************
+ f1_x = 64/p_Vid->mb_cr_size_x;
+ f2_x=f1_x-1;
+
+ f1_y = 64/p_Vid->mb_cr_size_y;
+ f2_y=f1_y-1;
+
+ f3=f1_x*f1_y;
+ f4=f3>>1;
+
+ for(uv=0;uv<2;uv++)
+ {
+ for (b8=0;b8<(p_Vid->num_uv_blocks);b8++)
+ {
+ for(b4=0;b4<4;b4++)
+ {
+ joff = subblk_offset_y[yuv][b8][b4];
+ j4=p_Vid->pix_c_y+joff;
+ ioff = subblk_offset_x[yuv][b8][b4];
+ i4=p_Vid->pix_c_x+ioff;
+
+ for(jj=0;jj<4;jj++)
+ {
+ jf=(j4+jj)/(p_Vid->mb_cr_size_y/4); // jf = Subblock_y-coordinate
+ for(ii=0;ii<4;ii++)
+ {
+ ifx=(i4+ii)/(p_Vid->mb_cr_size_x/4); // ifx = Subblock_x-coordinate
+
+ i1=(i4+ii)*f1_x + mv[0];
+ j1=(j4+jj)*f1_y + mv[1];
+
+ ii0=iClip3 (0, dec_picture->size_x_cr-1, i1/f1_x);
+ jj0=iClip3 (0, dec_picture->size_y_cr-1, j1/f1_y);
+ ii1=iClip3 (0, dec_picture->size_x_cr-1, ((i1+f2_x)/f1_x));
+ jj1=iClip3 (0, dec_picture->size_y_cr-1, ((j1+f2_y)/f1_y));
+
+ if1=(i1 & f2_x);
+ jf1=(j1 & f2_y);
+ if0=f1_x-if1;
+ jf0=f1_y-jf1;
+
+ currSlice->mb_pred[uv + 1][jj+joff][ii+ioff]=(if0*jf0*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj0][ii0]+
+ if1*jf0*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj0][ii1]+
+ if0*jf1*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj1][ii0]+
+ if1*jf1*p_Vid->listX[0][ref_frame]->imgUV[uv]->img[jj1][ii1]+f4)/f3;
+ }
+ }
+ }
+ }
+
+ for (j = 0; j < 8; j++)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ pMB[j*8+i] = currSlice->mb_pred[uv + 1][j][i];
+ }
+ }
+ pMB += 64;
+
+ }
+ }
+}
+/*!
+ ************************************************************************
+ * \brief
+ * Copies pixel values between a YUV frame and the temporary pixel value storage place. This is
+ * used to save some pixel values temporarily before overwriting it, or to copy back to a given
+ * location in a frame the saved pixel values.
+ * \param currYBlockNum
+ * index of the block (8x8) in the Y plane
+ * \param predMB
+ * memory area where the temporary pixel values are stored
+ * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320
+ * \param recfr
+ * pointer to a YUV frame
+ * \param picSizeX
+ * picture width in pixels
+ * \param regionSize
+ * can be 16 or 8 to tell the dimension of the region to copy
+ ************************************************************************
+ */
+static void copyPredMB (int currYBlockNum, imgpel *predMB, frame *recfr,
+ int picSizeX, int regionSize)
+{
+ VideoParameters *p_Vid = recfr->p_Vid;
+StorablePicture *dec_picture = p_Vid->dec_picture;
+ int j, k, xmin, ymin, xmax, ymax;
+ int locationTmp, locationPred;
+ int uv_x = uv_div[0][dec_picture->chroma_format_idc];
+ int uv_y = uv_div[1][dec_picture->chroma_format_idc];
+
+ xmin = (xPosYBlock(currYBlockNum,picSizeX)<<3);
+ ymin = (yPosYBlock(currYBlockNum,picSizeX)<<3);
+ xmax = xmin + regionSize -1;
+ ymax = ymin + regionSize -1;
+
+ for (j = ymin; j <= ymax; j++)
+ {
+ for (k = xmin; k <= xmax; k++)
+ {
+ locationPred = j * picSizeX + k;
+ locationTmp = (j-ymin) * 16 + (k-xmin);
+ dec_picture->imgY->img[j][k] = predMB[locationTmp];
+ }
+ }
+
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ for (j = (ymin>>uv_y); j <= (ymax>>uv_y); j++)
+ {
+ for (k = (xmin>>uv_x); k <= (xmax>>uv_x); k++)
+ {
+ locationPred = ((j * picSizeX) >> uv_x) + k;
+ locationTmp = (j-(ymin>>uv_y)) * p_Vid->mb_cr_size_x + (k-(xmin>>1)) + 256;
+ dec_picture->imgUV[0]->img[j][k] = predMB[locationTmp];
+
+ locationTmp += 64;
+
+ dec_picture->imgUV[1]->img[j][k] = predMB[locationTmp];
+ }
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Calculates a weighted pixel difference between edge Y pixels of the macroblock stored in predMB
+ * and the pixels in the given Y plane of a frame (recY) that would become neighbor pixels if
+ * predMB was placed at currYBlockNum block position into the frame. This "edge distortion" value
+ * is used to determine how well the given macroblock in predMB would fit into the frame when
+ * considering spatial smoothness. If there are correctly received neighbor blocks (status stored
+ * in predBlocks) only they are used in calculating the edge distorion; otherwise also the already
+ * concealed neighbor blocks can also be used.
+ * \return
+ * The calculated weighted pixel difference at the edges of the MB.
+ * \param predBlocks
+ * status array of the neighboring blocks (if they are OK, concealed or lost)
+ * \param currYBlockNum
+ * index of the block (8x8) in the Y plane
+ * \param predMB
+ * memory area where the temporary pixel values are stored
+ * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320
+ * \param recY
+ * pointer to a Y plane of a YUV frame
+ * \param picSizeX
+ * picture width in pixels
+ * \param regionSize
+ * can be 16 or 8 to tell the dimension of the region to copy
+ ************************************************************************
+ */
+static int edgeDistortion (int predBlocks[], int currYBlockNum, imgpel *predMB,
+ imgpel *recY, int picSizeX, int regionSize)
+{
+ int i, j, distortion, numOfPredBlocks, threshold = ERC_BLOCK_OK;
+ imgpel *currBlock = NULL, *neighbor = NULL;
+ int currBlockOffset = 0;
+
+ currBlock = recY + (yPosYBlock(currYBlockNum,picSizeX)<<3)*picSizeX + (xPosYBlock(currYBlockNum,picSizeX)<<3);
+
+ do
+ {
+
+ distortion = 0; numOfPredBlocks = 0;
+
+ // loop the 4 neighbors
+ for (j = 4; j < 8; j++)
+ {
+ /* if reliable, count boundary pixel difference */
+ if (predBlocks[j] >= threshold)
+ {
+
+ switch (j)
+ {
+ case 4:
+ neighbor = currBlock - picSizeX;
+ for ( i = 0; i < regionSize; i++ )
+ {
+ distortion += iabs((int)(predMB[i] - neighbor[i]));
+ }
+ break;
+ case 5:
+ neighbor = currBlock - 1;
+ for ( i = 0; i < regionSize; i++ )
+ {
+ distortion += iabs((int)(predMB[i*16] - neighbor[i*picSizeX]));
+ }
+ break;
+ case 6:
+ neighbor = currBlock + regionSize*picSizeX;
+ currBlockOffset = (regionSize-1)*16;
+ for ( i = 0; i < regionSize; i++ )
+ {
+ distortion += iabs((int)(predMB[i+currBlockOffset] - neighbor[i]));
+ }
+ break;
+ case 7:
+ neighbor = currBlock + regionSize;
+ currBlockOffset = regionSize-1;
+ for ( i = 0; i < regionSize; i++ )
+ {
+ distortion += iabs((int)(predMB[i*16+currBlockOffset] - neighbor[i*picSizeX]));
+ }
+ break;
+ }
+
+ numOfPredBlocks++;
+ }
+ }
+
+ threshold--;
+ if (threshold < ERC_BLOCK_CONCEALED)
+ break;
+ } while (numOfPredBlocks == 0);
+
+ if(numOfPredBlocks == 0)
+ {
+ return 0;
+ // assert (numOfPredBlocks != 0); !!!KS hmm, trying to continue...
+ }
+ return (distortion/numOfPredBlocks);
+}
+
+// picture error concealment below
+
+/*!
+************************************************************************
+* \brief
+* The motion prediction pixels are calculated from the given location (in
+* 1/4 pixel units) of the referenced frame. It copies the sub block from the
+* corresponding reference to the frame to be concealed.
+*
+*************************************************************************
+*/
+static void buildPredblockRegionYUV(VideoParameters *p_Vid, const short *mv,
+ int x, int y, imgpel *predMB, int list)
+{
+ int i=0,j=0,ii=0,jj=0,i1=0,j1=0,j4=0,i4=0;
+ int jf=0;
+ int uv;
+ int vec1_x=0,vec1_y=0;
+ int ioff,joff;
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ imgpel *pMB = predMB;
+
+ int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0;
+ int mv_mul;
+
+ //FRExt
+ int f1_x, f1_y, f2_x, f2_y, f3, f4, ifx;
+ int yuv = dec_picture->chroma_format_idc - 1;
+
+ int ref_frame = mv[2];
+ int mb_nr = p_Vid->current_mb_nr;
+
+ Macroblock *currMB = &p_Vid->mb_data[mb_nr]; // intialization code deleted, see below, StW
+ Slice *currSlice = currMB->p_Slice;
+
+ h264_imgpel_macroblock_t tmp_block;
+
+ /* Update coordinates of the current concealed macroblock */
+
+ p_Vid->mb_x = x/BLOCK_SIZE;
+ p_Vid->mb_y = y/BLOCK_SIZE;
+ p_Vid->block_y = p_Vid->mb_y * BLOCK_SIZE;
+ p_Vid->pix_c_y = p_Vid->mb_y * p_Vid->mb_cr_size_y/4;
+ p_Vid->block_x = p_Vid->mb_x * BLOCK_SIZE;
+ p_Vid->pix_c_x = p_Vid->mb_x * p_Vid->mb_cr_size_x/4;
+
+ mv_mul=4;
+
+ // luma *******************************************************
+
+ vec1_x = x*mv_mul + mv[0];
+ vec1_y = y*mv_mul + mv[1];
+ get_block_luma(currMB, PLANE_Y, p_Vid->listX[list][ref_frame], x,y, mv, BLOCK_SIZE, BLOCK_SIZE, tmp_block);
+
+ for(jj=0;jj<MB_BLOCK_SIZE/BLOCK_SIZE;jj++)
+ for(ii=0;ii<BLOCK_SIZE;ii++)
+ currSlice->mb_pred[LumaComp][jj][ii]=tmp_block[jj][ii];
+
+
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ pMB[j*4+i] = currSlice->mb_pred[LumaComp][j][i];
+ }
+ }
+ pMB += 16;
+
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ // chroma *******************************************************
+ f1_x = 64/(p_Vid->mb_cr_size_x);
+ f2_x=f1_x-1;
+
+ f1_y = 64/(p_Vid->mb_cr_size_y);
+ f2_y=f1_y-1;
+
+ f3=f1_x*f1_y;
+ f4=f3>>1;
+
+ for(uv=0;uv<2;uv++)
+ {
+ joff = subblk_offset_y[yuv][0][0];
+ j4=p_Vid->pix_c_y+joff;
+ ioff = subblk_offset_x[yuv][0][0];
+ i4=p_Vid->pix_c_x+ioff;
+
+ for(jj=0;jj<2;jj++)
+ {
+ jf=(j4+jj)/(p_Vid->mb_cr_size_y/4); // jf = Subblock_y-coordinate
+ for(ii=0;ii<2;ii++)
+ {
+ ifx=(i4+ii)/(p_Vid->mb_cr_size_x/4); // ifx = Subblock_x-coordinate
+
+ i1=(i4+ii)*f1_x + mv[0];
+ j1=(j4+jj)*f1_y + mv[1];
+
+ ii0=iClip3 (0, dec_picture->size_x_cr-1, i1/f1_x);
+ jj0=iClip3 (0, dec_picture->size_y_cr-1, j1/f1_y);
+ ii1=iClip3 (0, dec_picture->size_x_cr-1, ((i1+f2_x)/f1_x));
+ jj1=iClip3 (0, dec_picture->size_y_cr-1, ((j1+f2_y)/f1_y));
+
+ if1=(i1 & f2_x);
+ jf1=(j1 & f2_y);
+ if0=f1_x-if1;
+ jf0=f1_y-jf1;
+
+ currSlice->mb_pred[uv + 1][jj][ii]=(if0*jf0*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj0][ii0]+
+ if1*jf0*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj0][ii1]+
+ if0*jf1*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj1][ii0]+
+ if1*jf1*p_Vid->listX[list][ref_frame]->imgUV[uv]->img[jj1][ii1]+f4)/f3;
+ }
+ }
+
+ for (j = 0; j < 2; j++)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ pMB[j*2+i] = currSlice->mb_pred[uv + 1][j][i];
+ }
+ }
+ pMB += 4;
+
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* compares two stored pictures by picture number for qsort in descending order
+*
+************************************************************************
+*/
+static inline int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 )
+{
+ int pic_num1 = (*(StorablePicture**)arg1)->pic_num;
+ int pic_num2 = (*(StorablePicture**)arg2)->pic_num;
+
+ if (pic_num1 < pic_num2)
+ return 1;
+ if (pic_num1 > pic_num2)
+ return -1;
+ else
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* compares two stored pictures by picture number for qsort in descending order
+*
+************************************************************************
+*/
+static inline int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 )
+{
+ int long_term_pic_num1 = (*(StorablePicture**)arg1)->long_term_pic_num;
+ int long_term_pic_num2 = (*(StorablePicture**)arg2)->long_term_pic_num;
+ if ( long_term_pic_num1 < long_term_pic_num2)
+ return -1;
+
+ if ( long_term_pic_num1 > long_term_pic_num2)
+ return 1;
+ else
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* compares two stored pictures by poc for qsort in ascending order
+*
+************************************************************************
+*/
+static inline int compare_pic_by_poc_asc( const void *arg1, const void *arg2 )
+{
+ int poc1 = (*(StorablePicture**)arg1)->poc;
+ int poc2 = (*(StorablePicture**)arg2)->poc;
+
+ if ( poc1 < poc2)
+ return -1;
+ if ( poc1 > poc2)
+ return 1;
+ else
+ return 0;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* compares two stored pictures by poc for qsort in descending order
+*
+************************************************************************
+*/
+static inline int compare_pic_by_poc_desc( const void *arg1, const void *arg2 )
+{
+ int poc1 = (*(StorablePicture**)arg1)->poc;
+ int poc2 = (*(StorablePicture**)arg2)->poc;
+
+ if (poc1 < poc2)
+ return 1;
+ if (poc1 > poc2)
+ return -1;
+ else
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* Copy image data from one array to another array
+************************************************************************
+*/
+
+static void CopyImgData(imgpel **inputY, imgpel ***inputUV, imgpel **outputY, imgpel ***outputUV,
+ int img_width, int img_height, int img_width_cr, int img_height_cr)
+{
+ int x, y;
+
+ for (y=0; y<img_height; y++)
+ for (x=0; x<img_width; x++)
+ outputY[y][x] = inputY[y][x];
+
+ for (y=0; y<img_height_cr; y++)
+ for (x=0; x<img_width_cr; x++)
+ {
+ outputUV[0][y][x] = inputUV[0][y][x];
+ outputUV[1][y][x] = inputUV[1][y][x];
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Copies the last reference frame for concealing reference frame loss.
+************************************************************************
+*/
+
+static StorablePicture* get_last_ref_pic_from_dpb(DecodedPictureBuffer *p_Dpb)
+{
+ int used_size = p_Dpb->used_size - 1;
+ int i;
+
+ for(i = used_size; i >= 0; i--)
+ {
+ if (p_Dpb->fs[i]->is_used==3)
+ {
+ if (((p_Dpb->fs[i]->frame->used_for_reference) &&
+ (!p_Dpb->fs[i]->frame->is_long_term)) /*|| ((p_Dpb->fs[i]->frame->used_for_reference==0)
+ && (p_Dpb->fs[i]->frame->slice_type == P_SLICE))*/ )
+ {
+ return p_Dpb->fs[i]->frame;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/*!
+************************************************************************
+* \brief
+* Conceals the lost reference or non reference frame by either frame copy
+* or motion vector copy concealment.
+*
+************************************************************************
+*/
+
+static void copy_to_conceal(StorablePicture *src, StorablePicture *dst, VideoParameters *p_Vid)
+{
+ int i=0;
+ int ii=0, jj=0;
+ int scale = 1;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ // InputParameters *test;
+
+ p_Vid->current_mb_nr = 0;
+
+ dst->PicSizeInMbs = src->PicSizeInMbs;
+
+ dst->slice_type = src->slice_type = p_Vid->conceal_slice_type;
+
+ dst->idr_flag = FALSE; //since we do not want to clears the ref list
+
+ dst->no_output_of_prior_pics_flag = src->no_output_of_prior_pics_flag;
+ dst->long_term_reference_flag = src->long_term_reference_flag;
+ dst->adaptive_ref_pic_buffering_flag = src->adaptive_ref_pic_buffering_flag = 0;
+ dst->chroma_format_idc = src->chroma_format_idc;
+ dst->frame_mbs_only_flag = src->frame_mbs_only_flag;
+ dst->frame_cropping_flag = src->frame_cropping_flag;
+ dst->frame_cropping_rect_left_offset = src->frame_cropping_rect_left_offset;
+ dst->frame_cropping_rect_right_offset = src->frame_cropping_rect_right_offset;
+ dst->frame_cropping_rect_bottom_offset = src->frame_cropping_rect_bottom_offset;
+ dst->frame_cropping_rect_top_offset = src->frame_cropping_rect_top_offset;
+ dst->qp = src->qp;
+ dst->slice_qp_delta = src->slice_qp_delta;
+
+ dec_picture = src;
+}
+
+/*!
+************************************************************************
+* \brief
+* Uses the previous reference pic for concealment of reference frames
+*
+************************************************************************
+*/
+
+static void
+copy_prev_pic_to_concealed_pic(StorablePicture *picture, VideoParameters *p_Vid)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+
+ StorablePicture *ref_pic;
+ /* get the last ref pic in dpb */
+ ref_pic = get_last_ref_pic_from_dpb(p_Dpb);
+
+ assert(ref_pic != NULL);
+
+ /* copy all the struc from this to current concealment pic */
+ p_Vid->conceal_slice_type = P_SLICE;
+ copy_to_conceal(ref_pic, picture, p_Vid);
+}
+
+
+/*!
+************************************************************************
+* \brief
+* This function conceals a missing reference frame. The routine is called
+* based on the difference in frame number. It conceals an IDR frame loss
+* based on the sudden decrease in frame number.
+*
+************************************************************************
+*/
+// TODO: benski> pass timecode
+void conceal_lost_frames(VideoParameters *p_Vid)
+{
+ int CurrFrameNum;
+ int UnusedShortTermFrameNum;
+ StorablePicture *picture = NULL;
+ int tmp1 = p_Vid->delta_pic_order_cnt[0];
+ int tmp2 = p_Vid->delta_pic_order_cnt[1];
+ int i;
+
+ p_Vid->delta_pic_order_cnt[0] = p_Vid->delta_pic_order_cnt[1] = 0;
+
+ // printf("A gap in frame number is found, try to fill it.\n");
+
+ if(p_Vid->IDR_concealment_flag == 1)
+ {
+ // Conceals an IDR frame loss. Uses the reference frame in the previous
+ // GOP for concealment.
+ UnusedShortTermFrameNum = 0;
+ p_Vid->last_ref_pic_poc = -p_Vid->poc_gap;
+ p_Vid->earlier_missing_poc = 0;
+ }
+ else
+ UnusedShortTermFrameNum = (p_Vid->pre_frame_num + 1) % p_Vid->MaxFrameNum;
+
+ CurrFrameNum = p_Vid->frame_num;
+
+ while (CurrFrameNum != UnusedShortTermFrameNum)
+ {
+ picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+
+ picture->coded_frame = 1;
+ picture->pic_num = UnusedShortTermFrameNum;
+ picture->frame_num = UnusedShortTermFrameNum;
+ picture->non_existing = 0;
+ picture->is_output = 0;
+ picture->used_for_reference = 1;
+ picture->concealed_pic = 1;
+
+ picture->adaptive_ref_pic_buffering_flag = 0;
+
+ p_Vid->frame_num = UnusedShortTermFrameNum;
+
+ picture->top_poc=p_Vid->last_ref_pic_poc + p_Vid->ref_poc_gap;
+ picture->bottom_poc=picture->top_poc;
+ picture->frame_poc=picture->top_poc;
+ picture->poc=picture->top_poc;
+ p_Vid->last_ref_pic_poc = picture->poc;
+
+ copy_prev_pic_to_concealed_pic(picture, p_Vid);
+
+ //if (UnusedShortTermFrameNum == 0)
+ if(p_Vid->IDR_concealment_flag == 1)
+ {
+ picture->slice_type = I_SLICE;
+ picture->idr_flag = TRUE;
+ flush_dpb(p_Vid);
+ picture->top_poc= 0;
+ picture->bottom_poc=picture->top_poc;
+ picture->frame_poc=picture->top_poc;
+ picture->poc=picture->top_poc;
+ p_Vid->last_ref_pic_poc = picture->poc;
+ }
+
+ store_picture_in_dpb(p_Vid, picture);
+
+ picture=NULL;
+
+ p_Vid->pre_frame_num = UnusedShortTermFrameNum;
+ UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % p_Vid->MaxFrameNum;
+
+ // update reference flags and set current flag.
+ for(i=16;i>0;i--)
+ {
+ p_Vid->ref_flag[i] = p_Vid->ref_flag[i-1];
+ }
+ p_Vid->ref_flag[0] = 0;
+ }
+ p_Vid->delta_pic_order_cnt[0] = tmp1;
+ p_Vid->delta_pic_order_cnt[1] = tmp2;
+ p_Vid->frame_num = CurrFrameNum;
+}
+
+/*!
+************************************************************************
+* \brief
+* Updates the reference list for motion vector copy concealment for non-
+* reference frame loss.
+*
+************************************************************************
+*/
+
+void update_ref_list_for_concealment(DecodedPictureBuffer *p_Dpb)
+{
+ VideoParameters *p_Vid = p_Dpb->p_Vid;
+
+ unsigned i, j;
+ for (i=0, j=0; i<p_Dpb->used_size; i++)
+ {
+ if (p_Dpb->fs[i]->concealment_reference)
+ {
+ p_Dpb->fs_ref[j++] = p_Dpb->fs[i];
+ }
+ }
+
+ p_Dpb->ref_frames_in_buffer = p_Vid->active_pps->num_ref_idx_l0_active_minus1;
+}
+
+/*!
+************************************************************************
+* \brief
+* Initialize the list based on the B frame or non reference 'p' frame
+* to be concealed. The function initialize p_Vid->listX[0] and list 1 depending
+* on current picture type
+*
+************************************************************************
+*/
+void init_lists_for_non_reference_loss(VideoParameters *p_Vid, int currSliceType, PictureStructure currPicStructure)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ unsigned i;
+ int j;
+ int MaxFrameNum = 1 << (active_sps->log2_max_frame_num_minus4 + 4);
+ int diff;
+
+ int list0idx = 0;
+ int list0idx_1 = 0;
+
+ StorablePicture *tmp_s;
+
+ if (currPicStructure == FRAME)
+ {
+ for(i=0;i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if(p_Dpb->fs[i]->concealment_reference == 1)
+ {
+ if(p_Dpb->fs[i]->frame_num > p_Vid->frame_to_conceal)
+ p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs[i]->frame_num - MaxFrameNum;
+ else
+ p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs[i]->frame_num;
+ p_Dpb->fs_ref[i]->frame->pic_num = p_Dpb->fs_ref[i]->frame_num_wrap;
+ }
+ }
+ }
+
+ if (currSliceType == P_SLICE)
+ {
+ // Calculate FrameNumWrap and PicNum
+ if (currPicStructure == FRAME)
+ {
+ for(i=0;i<p_Dpb->used_size; i++)
+ {
+ if(p_Dpb->fs[i]->concealment_reference == 1)
+ {
+ p_Vid->listX[0][list0idx++] = p_Dpb->fs[i]->frame;
+ }
+ }
+ // order list 0 by PicNum
+ qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc);
+ p_Vid->listXsize[0] = list0idx;
+ }
+ }
+
+ if (currSliceType == B_SLICE)
+ {
+ if (currPicStructure == FRAME)
+ {
+ // for(i=0;i<p_Dpb->ref_frames_in_buffer; i++)
+ for(i=0;i<p_Dpb->used_size; i++)
+ {
+ if(p_Dpb->fs[i]->concealment_reference == 1)
+ {
+ if(p_Vid->earlier_missing_poc > p_Dpb->fs[i]->frame->poc)
+ p_Vid->listX[0][list0idx++] = p_Dpb->fs[i]->frame;
+ }
+ }
+
+ qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc);
+ list0idx_1 = list0idx;
+
+ // for(i=0;i<p_Dpb->ref_frames_in_buffer; i++)
+ for(i=0;i<p_Dpb->used_size; i++)
+ {
+ if(p_Dpb->fs[i]->concealment_reference == 1)
+ {
+ if(p_Vid->earlier_missing_poc < p_Dpb->fs[i]->frame->poc)
+ p_Vid->listX[0][list0idx++] = p_Dpb->fs[i]->frame;
+ }
+ }
+
+ qsort((void *)&p_Vid->listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc);
+
+ for (j=0; j<list0idx_1; j++)
+ {
+ p_Vid->listX[1][list0idx-list0idx_1+j]=p_Vid->listX[0][j];
+ }
+ for (j=list0idx_1; j<list0idx; j++)
+ {
+ p_Vid->listX[1][j-list0idx_1]=p_Vid->listX[0][j];
+ }
+
+ p_Vid->listXsize[0] = p_Vid->listXsize[1] = list0idx;
+
+ qsort((void *)&p_Vid->listX[0][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+ qsort((void *)&p_Vid->listX[1][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+ p_Vid->listXsize[0] = p_Vid->listXsize[1] = list0idx;
+ }
+ }
+
+ if ((p_Vid->listXsize[0] == p_Vid->listXsize[1]) && (p_Vid->listXsize[0] > 1))
+ {
+ // check if lists are identical, if yes swap first two elements of listX[1]
+ diff=0;
+ for (j = 0; j< p_Vid->listXsize[0]; j++)
+ {
+ if (p_Vid->listX[0][j]!=p_Vid->listX[1][j])
+ diff=1;
+ }
+ if (!diff)
+ {
+ tmp_s = p_Vid->listX[1][0];
+ p_Vid->listX[1][0]=p_Vid->listX[1][1];
+ p_Vid->listX[1][1]=tmp_s;
+ }
+ }
+
+ // set max size
+ p_Vid->listXsize[0] = imin (p_Vid->listXsize[0], (int)active_sps->num_ref_frames);
+ p_Vid->listXsize[1] = imin (p_Vid->listXsize[1], (int)active_sps->num_ref_frames);
+
+ p_Vid->listXsize[1] = 0;
+ // set the unused list entries to NULL
+ for (i=p_Vid->listXsize[0]; i< (MAX_LIST_SIZE) ; i++)
+ {
+ p_Vid->listX[0][i] = NULL;
+ }
+ for (i=p_Vid->listXsize[1]; i< (MAX_LIST_SIZE) ; i++)
+ {
+ p_Vid->listX[1][i] = NULL;
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Get from the dpb the picture corresponding to a POC. The POC varies
+* depending on whether it is a frame copy or motion vector copy concealment.
+* The frame corresponding to the POC is returned.
+*
+************************************************************************
+*/
+
+StorablePicture *get_pic_from_dpb(VideoParameters *p_Vid, int missingpoc, unsigned int *pos)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ int used_size = p_Dpb->used_size - 1;
+ int i, concealfrom = 0;
+
+ for(i = used_size; i >= 0; i--)
+ {
+ if(p_Dpb->fs[i]->poc == concealfrom)
+ {
+ *pos = i;
+ return p_Dpb->fs[i]->frame;
+ }
+ }
+
+ return NULL;
+}
+
+/*!
+************************************************************************
+* \brief
+* Function to sort the POC and find the lowest number in the POC list
+* Compare the integers
+*
+************************************************************************
+*/
+
+int comp(const void *i, const void *j)
+{
+ return *(int *)i - *(int *)j;
+}
+
+/*!
+************************************************************************
+* \brief
+* Initialises a node, allocates memory for the node, and returns
+* a pointer to the new node.
+*
+************************************************************************
+*/
+
+struct concealment_node * init_node( StorablePicture* picture, int missingpoc )
+{
+ struct concealment_node *ptr;
+
+ ptr = (struct concealment_node *) calloc( 1, sizeof(struct concealment_node ) );
+
+ if( ptr == NULL )
+ return (struct concealment_node *) NULL;
+ else {
+ ptr->picture = picture;
+ ptr->missingpocs = missingpoc;
+ ptr->next = NULL;
+ return ptr;
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Prints the details of a node
+*
+************************************************************************
+*/
+
+void print_node( struct concealment_node *ptr )
+{
+ printf("Missing POC=%d\n", ptr->missingpocs );
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Prints all nodes from the current address passed to it.
+*
+************************************************************************
+*/
+
+void print_list( struct concealment_node *ptr )
+{
+ while( ptr != NULL )
+ {
+ print_node( ptr );
+ ptr = ptr->next;
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Adds a node to the end of the list.
+*
+************************************************************************
+*/
+
+
+static void add_node( VideoParameters *p_Vid, struct concealment_node *concealment_new )
+{
+ if( p_Vid->concealment_head == NULL )
+ {
+ p_Vid->concealment_end = p_Vid->concealment_head = concealment_new;
+ return;
+ }
+ p_Vid->concealment_end->next = concealment_new;
+ p_Vid->concealment_end = concealment_new;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Deletes the specified node pointed to by 'ptr' from the list
+*
+************************************************************************
+*/
+
+
+static void delete_node( VideoParameters *p_Vid, struct concealment_node *ptr )
+{
+ // We only need to delete the first node in the linked list
+ if( ptr == p_Vid->concealment_head )
+ {
+ p_Vid->concealment_head = p_Vid->concealment_head->next;
+ if( p_Vid->concealment_end == ptr )
+ p_Vid->concealment_end = p_Vid->concealment_end->next;
+ free(ptr);
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Deletes all nodes from the place specified by ptr
+*
+************************************************************************
+*/
+
+void delete_list( VideoParameters *p_Vid, struct concealment_node *ptr )
+{
+ struct concealment_node *temp;
+
+ if( p_Vid->concealment_head == NULL ) return;
+
+ if( ptr == p_Vid->concealment_head )
+ {
+ p_Vid->concealment_head = NULL;
+ p_Vid->concealment_end = NULL;
+ }
+ else
+ {
+ temp = p_Vid->concealment_head;
+
+ while( temp->next != ptr )
+ temp = temp->next;
+ p_Vid->concealment_end = temp;
+ }
+
+ while( ptr != NULL )
+ {
+ temp = ptr->next;
+ free( ptr );
+ ptr = temp;
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Stores the missing non reference frames in the concealment buffer. The
+* detection is based on the POC difference in the sorted POC array. A missing
+* non reference frame is detected when the dpb is full. A singly linked list
+* is maintained for storing the missing non reference frames.
+*
+************************************************************************
+*/
+// TODO: benski> pass timecode
+void conceal_non_ref_pics(VideoParameters *p_Vid, int diff)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ int missingpoc = 0;
+ unsigned int i, pos = 0;
+ StorablePicture *conceal_from_picture = NULL;
+ StorablePicture *conceal_to_picture = NULL;
+ struct concealment_node *concealment_ptr = NULL;
+ int temp_used_size = p_Dpb->used_size;
+
+ if(p_Dpb->used_size == 0 )
+ return;
+
+ qsort(p_Vid->pocs_in_dpb, p_Dpb->size, sizeof(int), comp);
+
+ for(i=0;i<p_Dpb->size-diff;i++)
+ {
+ p_Dpb->used_size = p_Dpb->size;
+ if((p_Vid->pocs_in_dpb[i+1] - p_Vid->pocs_in_dpb[i]) > p_Vid->poc_gap)
+ {
+ conceal_to_picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+
+ missingpoc = p_Vid->pocs_in_dpb[i] + p_Vid->poc_gap;
+ // Diagnostics
+ // printf("\n missingpoc = %d\n",missingpoc);
+
+ if(missingpoc > p_Vid->earlier_missing_poc)
+ {
+ p_Vid->earlier_missing_poc = missingpoc;
+ conceal_to_picture->top_poc= missingpoc;
+ conceal_to_picture->bottom_poc=missingpoc;
+ conceal_to_picture->frame_poc=missingpoc;
+ conceal_to_picture->poc=missingpoc;
+ conceal_from_picture = get_pic_from_dpb(p_Vid, missingpoc, &pos);
+
+ assert(conceal_from_picture != NULL);
+
+ p_Dpb->used_size = pos+1;
+
+ p_Vid->frame_to_conceal = conceal_from_picture->frame_num + 1;
+
+ update_ref_list_for_concealment(p_Dpb);
+ p_Vid->conceal_slice_type = B_SLICE;
+ copy_to_conceal(conceal_from_picture, conceal_to_picture, p_Vid);
+ concealment_ptr = init_node( conceal_to_picture, missingpoc );
+ add_node(p_Vid, concealment_ptr);
+ // Diagnostics
+ // print_node(concealment_ptr);
+ }
+ }
+ }
+
+ //restore the original value
+ //p_Dpb->used_size = p_Dpb->size;
+ p_Dpb->used_size = temp_used_size;
+}
+
+/*!
+************************************************************************
+* \brief
+* Perform Sliding window decoded reference picture marking process. It
+* maintains the POC s stored in the dpb at a specific instance.
+*
+************************************************************************
+*/
+
+void sliding_window_poc_management(DecodedPictureBuffer *p_Dpb, StorablePicture *p)
+{
+ if (p_Dpb->used_size == p_Dpb->size)
+ {
+ VideoParameters *p_Vid = p_Dpb->p_Vid;
+ unsigned int i;
+
+ for(i=0;i<p_Dpb->size-1; i++)
+ p_Vid->pocs_in_dpb[i] = p_Vid->pocs_in_dpb[i+1];
+ }
+
+ // p_Vid->pocs_in_dpb[p_Dpb->used_size-1] = p->poc;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Outputs the non reference frames. The POCs in the concealment buffer are
+* sorted in ascending order and outputted when the lowest POC in the
+* concealment buffer is lower than the lowest in the p_Dpb-> The linked list
+* entry corresponding to the outputted POC is immediately deleted.
+*
+************************************************************************
+*/
+
+void write_lost_non_ref_pic(VideoParameters *p_Vid, int poc)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ FrameStore concealment_fs;
+ if(poc > 0)
+ {
+ if((poc - p_Dpb->last_output_poc) > p_Vid->poc_gap)
+ {
+
+ concealment_fs.frame = p_Vid->concealment_head->picture;
+ concealment_fs.is_output = 0;
+ concealment_fs.is_reference = 0;
+ concealment_fs.is_used = 3;
+
+ write_stored_frame(p_Vid, &concealment_fs);
+ delete_node(p_Vid, p_Vid->concealment_head);
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Conceals frame loss immediately after the IDR. This special case produces
+* the same result for either frame copy or motion vector copy concealment.
+*
+************************************************************************
+*/
+// TODO: benski> pass timecode
+void write_lost_ref_after_idr(VideoParameters *p_Vid, int pos)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ int temp = 1;
+
+ if(p_Vid->last_out_fs->frame == NULL)
+ {
+ p_Vid->last_out_fs->frame = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height,
+ p_Vid->width_cr, p_Vid->height_cr);
+ p_Vid->last_out_fs->is_used = 3;
+ }
+
+ copy_to_conceal(p_Dpb->fs[pos]->frame, p_Vid->last_out_fs->frame, p_Vid);
+}
+
diff --git a/Src/h264dec/ldecod/src/errorconcealment.c b/Src/h264dec/ldecod/src/errorconcealment.c
new file mode 100644
index 00000000..6b1b47bf
--- /dev/null
+++ b/Src/h264dec/ldecod/src/errorconcealment.c
@@ -0,0 +1,138 @@
+
+/*!
+ ***********************************************************************
+ * \file errorconcealment.c
+ *
+ * \brief
+ * Implements error concealment scheme for H.264 decoder
+ *
+ * \date
+ * 6.10.2000
+ *
+ * \version
+ * 1.0
+ *
+ * \note
+ * This simple error concealment implemented in this decoder uses
+ * the existing dependencies of syntax elements.
+ * In case that an element is detected as false this elements and all
+ * dependend elements are marked as elements to conceal in the p_Vid->ec_flag[]
+ * array. If the decoder requests a new element by the function
+ * readSyntaxElement_xxxx() this array is checked first if an error concealment has
+ * to be applied on this element.
+ * In case that an error occured a concealed element is given to the
+ * decoding function in macroblock().
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de>
+ ***********************************************************************
+ */
+
+#include "contributors.h"
+#include "global.h"
+#include "elements.h"
+
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * set concealment for all elements in same partition
+ * and dependend syntax elements
+ * \param p_Vid
+ * image encoding parameters for current picture
+ * \param se
+ * type of syntax element to conceal
+ * \return
+ * EC_REQ, elements of same type or depending type need error concealment. \n
+ * EX_SYNC sync on next header
+ ***********************************************************************
+ */
+int set_ec_flag(VideoParameters *p_Vid, int se)
+{
+
+ /*
+ if (p_Vid->ec_flag[se] == NO_EC)
+ printf("Error concealment on element %s\n",SEtypes[se]);
+ */
+ switch (se)
+ {
+ case SE_HEADER :
+ p_Vid->ec_flag[SE_HEADER] = EC_REQ;
+ case SE_PTYPE :
+ p_Vid->ec_flag[SE_PTYPE] = EC_REQ;
+ case SE_MBTYPE :
+ p_Vid->ec_flag[SE_MBTYPE] = EC_REQ;
+
+ case SE_REFFRAME :
+ p_Vid->ec_flag[SE_REFFRAME] = EC_REQ;
+ p_Vid->ec_flag[SE_MVD] = EC_REQ; // set all motion vectors to zero length
+ se = SE_CBP_INTER; // conceal also Inter texture elements
+ break;
+
+ case SE_INTRAPREDMODE :
+ p_Vid->ec_flag[SE_INTRAPREDMODE] = EC_REQ;
+ se = SE_CBP_INTRA; // conceal also Intra texture elements
+ break;
+ case SE_MVD :
+ p_Vid->ec_flag[SE_MVD] = EC_REQ;
+ se = SE_CBP_INTER; // conceal also Inter texture elements
+ break;
+
+ default:
+ break;
+ }
+
+ switch (se)
+ {
+ case SE_CBP_INTRA :
+ p_Vid->ec_flag[SE_CBP_INTRA] = EC_REQ;
+ case SE_LUM_DC_INTRA :
+ p_Vid->ec_flag[SE_LUM_DC_INTRA] = EC_REQ;
+ case SE_CHR_DC_INTRA :
+ p_Vid->ec_flag[SE_CHR_DC_INTRA] = EC_REQ;
+ case SE_LUM_AC_INTRA :
+ p_Vid->ec_flag[SE_LUM_AC_INTRA] = EC_REQ;
+ case SE_CHR_AC_INTRA :
+ p_Vid->ec_flag[SE_CHR_AC_INTRA] = EC_REQ;
+ break;
+
+ case SE_CBP_INTER :
+ p_Vid->ec_flag[SE_CBP_INTER] = EC_REQ;
+ case SE_LUM_DC_INTER :
+ p_Vid->ec_flag[SE_LUM_DC_INTER] = EC_REQ;
+ case SE_CHR_DC_INTER :
+ p_Vid->ec_flag[SE_CHR_DC_INTER] = EC_REQ;
+ case SE_LUM_AC_INTER :
+ p_Vid->ec_flag[SE_LUM_AC_INTER] = EC_REQ;
+ case SE_CHR_AC_INTER :
+ p_Vid->ec_flag[SE_CHR_AC_INTER] = EC_REQ;
+ break;
+ case SE_DELTA_QUANT_INTER :
+ p_Vid->ec_flag[SE_DELTA_QUANT_INTER] = EC_REQ;
+ break;
+ case SE_DELTA_QUANT_INTRA :
+ p_Vid->ec_flag[SE_DELTA_QUANT_INTRA] = EC_REQ;
+ break;
+ default:
+ break;
+
+ }
+ return EC_REQ;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * resets EC_Flags called at the start of each slice
+ *
+ ***********************************************************************
+ */
+void reset_ec_flags(VideoParameters *p_Vid)
+{
+ int i;
+ for (i=0; i<SE_MAX_ELEMENTS; i++)
+ p_Vid->ec_flag[i] = NO_EC;
+}
+
diff --git a/Src/h264dec/ldecod/src/filter_chroma_horiz.c b/Src/h264dec/ldecod/src/filter_chroma_horiz.c
new file mode 100644
index 00000000..dbed3e15
--- /dev/null
+++ b/Src/h264dec/ldecod/src/filter_chroma_horiz.c
@@ -0,0 +1,533 @@
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+#include <mmintrin.h>
+#include <emmintrin.h>
+
+static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ;
+static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ;
+static const byte CLIP_TAB[52][5] =
+{
+ { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},
+ { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},
+ { -1, 0, 0, 0, 0},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 1, 1, 1, 1},
+ { -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 2, 3, 3},
+ { -1, 1, 2, 3, 3},{ -1, 2, 2, 3, 3},{ -1, 2, 2, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 3, 3, 5, 5},{ -1, 3, 4, 6, 6},{ -1, 3, 4, 6, 6},
+ { -1, 4, 5, 7, 7},{ -1, 4, 5, 8, 8},{ -1, 4, 6, 9, 9},{ -1, 5, 7,10,10},{ -1, 6, 8,11,11},{ -1, 6, 8,13,13},{ -1, 7,10,14,14},{ -1, 8,11,16,16},
+ { -1, 9,12,18,18},{ -1,10,13,20,20},{ -1,11,15,23,23},{ -1,13,17,25,25}
+};
+
+static const int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format]
+
+#define LOAD_LINE_EPI16(reg, ptr) { reg = _mm_loadl_epi64((__m128i *)(ptr)); reg = _mm_unpacklo_epi8(reg, xmm_zero); }
+static void FilterChroma8_Horiz_sse2(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, const byte Strength[16], const byte *ClipTab, int Alpha, int Beta, int bitdepth_scale, int max_imgpel_value)
+{
+ __m128i xmm_L1, xmm_L0, xmm_R0, xmm_R1;
+ __m128i xmm_strength;
+ __m128i xmm_absdiff, xmm_diff, xmm_acc;
+ __m128i xmm_127, xmm_zero;
+ __m128i xmm_alpha, xmm_beta;
+
+ int match;
+ xmm_zero = _mm_setzero_si128();
+ xmm_strength = _mm_load_si128((__m128i *)Strength);
+ xmm_127 = _mm_set1_epi8(127);
+ xmm_strength = _mm_adds_epu8(xmm_strength, xmm_127);
+ xmm_strength = _mm_srai_epi16(xmm_strength, 15); // shift so it's all 0xFFFF or 0x0000
+
+ LOAD_LINE_EPI16(xmm_R0, SrcPtrQ);
+ LOAD_LINE_EPI16(xmm_L0, SrcPtrP);
+
+ xmm_alpha = _mm_set1_epi16((uint16_t)Alpha);
+
+ // if ( abs( R0 - L0 ) < Alpha )
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0);
+ xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ LOAD_LINE_EPI16(xmm_R1, SrcPtrQ+inc_dim);
+
+ xmm_beta = _mm_set1_epi16((uint16_t)Beta);
+
+ // if ( abs(R0 - R1) < Beta )
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R1);
+ xmm_absdiff =_mm_subs_epu16(xmm_R1, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ LOAD_LINE_EPI16(xmm_L1, SrcPtrP-inc_dim);
+
+ // if ( abs(L0 - L1) < Beta )
+ xmm_diff=_mm_subs_epu16(xmm_L0, xmm_L1);
+ xmm_absdiff =_mm_subs_epu16(xmm_L1, xmm_L0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ if (Strength[0] == 4) // if strong filter is in use, ALL strengths will be 4
+ {
+ // *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ __m128i xmm_2 = _mm_set1_epi16(2);
+
+ xmm_acc = xmm_L1;
+ xmm_acc = _mm_slli_epi16(xmm_acc, 1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_L0);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_R1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_2);
+ xmm_acc = _mm_srai_epi16(xmm_acc, 2);
+ xmm_acc = _mm_and_si128(xmm_acc, xmm_strength);
+ xmm_L0 = _mm_andnot_si128(xmm_strength, xmm_L0);
+ xmm_L0 = _mm_or_si128(xmm_L0, xmm_acc);
+ xmm_L0 = _mm_packus_epi16(xmm_L0, xmm_L0);
+ _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_L0);
+
+ // *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ xmm_acc = xmm_R1;
+ xmm_acc = _mm_slli_epi16(xmm_acc, 1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_R0);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_L1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_2);
+ xmm_acc = _mm_srai_epi16(xmm_acc, 2);
+ xmm_acc = _mm_and_si128(xmm_acc, xmm_strength);
+ xmm_R0 = _mm_andnot_si128(xmm_strength, xmm_R0);
+ xmm_R0 = _mm_or_si128(xmm_R0, xmm_acc);
+ xmm_R0 = _mm_packus_epi16(xmm_R0, xmm_R0);
+ _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_R0);
+ }
+ else
+ {
+ int C0 = ClipTab[ Strength[0] ] * bitdepth_scale + 1;
+ int C1 = ClipTab[ Strength[4] ] * bitdepth_scale + 1;
+ int C2 = ClipTab[ Strength[8] ] * bitdepth_scale + 1;
+ int C3 = ClipTab[ Strength[12] ] * bitdepth_scale + 1;
+ __m128i xmm_tc0 = _mm_setr_epi16(C0, C0, C1, C1, C2, C2, C3, C3); // TODO: benski> probably a better way to do this.
+ __m128i xmm_negative_tc0 = _mm_sub_epi16(xmm_zero, xmm_tc0);
+ __m128i xmm_4 = _mm_set1_epi16(4);
+ //int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+ xmm_acc = xmm_R0;
+ xmm_acc = _mm_sub_epi16(xmm_acc, xmm_L0);
+ xmm_acc = _mm_slli_epi16(xmm_acc, 2);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_L1);
+ xmm_acc = _mm_sub_epi16(xmm_acc, xmm_R1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_4);
+ xmm_acc = _mm_srai_epi16(xmm_acc, 3);
+ xmm_acc = _mm_min_epi16(xmm_acc, xmm_tc0);
+ xmm_acc = _mm_max_epi16(xmm_acc, xmm_negative_tc0);
+ xmm_acc = _mm_and_si128(xmm_acc, xmm_strength);
+
+ // *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ;
+ xmm_L0 = _mm_add_epi16(xmm_L0, xmm_acc);
+ xmm_L0 = _mm_packus_epi16(xmm_L0, xmm_L0);
+ _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_L0);
+
+ // *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ;
+ xmm_R0 = _mm_sub_epi16(xmm_R0, xmm_acc);
+ xmm_R0 = _mm_packus_epi16(xmm_R0, xmm_R0);
+ _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_R0);
+ }
+
+
+}
+
+static void IntraStrongFilter_Chroma8_Horiz_YUV420_sse2(int inc_dim, imgpel *SrcPtrP, int Alpha, int Beta)
+{
+ __m128i xmm_L1, xmm_L0, xmm_R0, xmm_R1;
+ __m128i xmm_strength;
+ __m128i xmm_absdiff, xmm_diff, xmm_acc;
+ __m128i xmm_zero;
+ __m128i xmm_alpha, xmm_beta;
+__m128i xmm_2;
+
+ int match;
+ xmm_zero = _mm_setzero_si128();
+
+ LOAD_LINE_EPI16(xmm_L0, SrcPtrP);
+ LOAD_LINE_EPI16(xmm_R0, SrcPtrP+inc_dim);
+
+ xmm_alpha = _mm_set1_epi16((uint16_t)Alpha);
+
+ // if ( abs( R0 - L0 ) < Alpha )
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0);
+ xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_strength = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ LOAD_LINE_EPI16(xmm_R1, SrcPtrP+2*inc_dim);
+
+ xmm_beta = _mm_set1_epi16((uint16_t)Beta);
+
+ // if ( abs(R0 - R1) < Beta )
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R1);
+ xmm_absdiff =_mm_subs_epu16(xmm_R1, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ LOAD_LINE_EPI16(xmm_L1, SrcPtrP-inc_dim);
+
+ // if ( abs(L0 - L1) < Beta )
+ xmm_diff=_mm_subs_epu16(xmm_L0, xmm_L1);
+ xmm_absdiff =_mm_subs_epu16(xmm_L1, xmm_L0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+
+ // *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ xmm_2 = _mm_set1_epi16(2);
+
+ xmm_acc = xmm_L1;
+ xmm_acc = _mm_slli_epi16(xmm_acc, 1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_L0);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_R1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_2);
+ xmm_acc = _mm_srai_epi16(xmm_acc, 2);
+ xmm_acc = _mm_and_si128(xmm_acc, xmm_strength);
+ xmm_L0 = _mm_andnot_si128(xmm_strength, xmm_L0);
+ xmm_L0 = _mm_or_si128(xmm_L0, xmm_acc);
+ xmm_L0 = _mm_packus_epi16(xmm_L0, xmm_L0);
+ _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_L0);
+
+ // *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ xmm_acc = xmm_R1;
+ xmm_acc = _mm_slli_epi16(xmm_acc, 1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_R0);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_L1);
+ xmm_acc = _mm_add_epi16(xmm_acc, xmm_2);
+ xmm_acc = _mm_srai_epi16(xmm_acc, 2);
+ xmm_acc = _mm_and_si128(xmm_acc, xmm_strength);
+ xmm_R0 = _mm_andnot_si128(xmm_strength, xmm_R0);
+ xmm_R0 = _mm_or_si128(xmm_R0, xmm_acc);
+ xmm_R0 = _mm_packus_epi16(xmm_R0, xmm_R0);
+ _mm_storel_epi64((__m128i *)(SrcPtrP+inc_dim), xmm_R0);
+
+
+
+}
+
+
+// separate function to make it easier to unit test
+static void FilterChroma8_Horiz(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, const byte Strength[16], const byte *ClipTab, int Alpha, int Beta, int bitdepth_scale, int max_imgpel_value)
+{
+ int pel;
+ for( pel = 0 ; pel < 8 ; ++pel, SrcPtrP++, SrcPtrQ++ )
+ {
+ int Strng = Strength[(((pel >> 1) << 2) + (pel & 0x01))];
+
+ if( Strng != 0)
+ {
+ imgpel L0 = *SrcPtrP;
+ imgpel R0 = *SrcPtrQ;
+
+ if ( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *(SrcPtrQ + inc_dim);
+ if ( abs(R0 - R1) < Beta )
+ {
+ imgpel L1 = *(SrcPtrP - inc_dim);
+ if ( abs(L0 - L1) < Beta )
+ {
+ if( Strng == 4 ) // INTRA strong filtering
+ {
+ *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ }
+ else
+ {
+ int tc0 = ClipTab[ Strng ] * bitdepth_scale + 1;
+ int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void EdgeLoopChromaNormal_Horiz(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p)
+{
+ // dir == 1
+ imgpel** Img = image->img;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+
+ int yQ = (edge < 16 ? edge - 1: 0);
+ PixelPos pixMB1;
+
+ p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_CHROMA], &pixMB1);
+
+ if (pixMB1.available || (MbQ->DFDisableIdc == 0))
+ {
+ int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+ PixelPos pixP = pixMB1;
+ Macroblock *MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+
+ // Average QP of the two blocks
+ int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ int Beta = BETA_TABLE [indexB] * bitdepth_scale;
+
+ if (Alpha !=0 && Beta != 0)
+ {
+ const int PelNum = pelnum_cr[1][p->chroma_format_idc];
+ const byte *ClipTab = CLIP_TAB[indexA];
+ int inc_dim = image->stride;
+ int pel;
+ PixelPos pixQ, pixMB2;
+
+ p_Vid->getNeighbour0X(MbQ, ++yQ, p_Vid->mb_size[IS_CHROMA], &pixMB2);
+ pixQ = pixMB2;
+
+ if (pelnum_cr[1][p->chroma_format_idc] == 8)
+ {
+ imgpel *SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+ imgpel *SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ if (sse2_flag)
+ FilterChroma8_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, Strength, ClipTab, Alpha, Beta, bitdepth_scale, max_imgpel_value);
+ else
+ FilterChroma8_Horiz(inc_dim, SrcPtrP, SrcPtrQ, Strength, ClipTab, Alpha, Beta, bitdepth_scale, max_imgpel_value);
+
+ }
+ else
+ {
+ for( pel = 0 ; pel < PelNum ; ++pel )
+ {
+ int Strng = Strength[(PelNum == 8) ? (((pel >> 1) << 2) + (pel & 0x01)) : pel];
+
+ if( Strng != 0)
+ {
+ imgpel *SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+ imgpel L0 = *SrcPtrP;
+ imgpel *SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ imgpel R0 = *SrcPtrQ;
+
+ if ( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *(SrcPtrQ + inc_dim);
+ if ( abs(R0 - R1) < Beta )
+ {
+ imgpel L1 = *(SrcPtrP - inc_dim);
+ if ( abs(L0 - L1) < Beta )
+ {
+ if( Strng == 4 ) // INTRA strong filtering
+ {
+ *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ }
+ else
+ {
+ int tc0 = ClipTab[ Strng ] * bitdepth_scale + 1;
+ int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ;
+ }
+ }
+ }
+ }
+ }
+ pixP.pos_x++;
+ pixQ.pos_x++;
+ }
+ }
+ }
+ }
+}
+
+
+static void FilterChroma8_Horiz_sse(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const char *ClipTab)
+{
+ __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1);
+ __m64 mmx_zero = _mm_setzero_si64(), mmx_four=_mm_set1_pi16(4);
+ __m64 mmx_minus_one;
+ __m64 mmx_absdiff, mmx_diff;
+ __m64 mmx_L0, mmx_L1;
+ __m64 mmx_R0, mmx_R1;
+ __m64 mmx_C0, mmx_negative_C0, mmx_dif, mmx_match;
+ int match;
+ int i=0;
+
+ mmx_minus_one = _mm_set1_pi32(-1);
+
+ STAGE:
+
+ while (!Strength[i*2] && !Strength[i*2+1])
+ {
+ SrcPtrP += 4;
+ if (i++ == 1) // last stage
+ return;
+ }
+
+ mmx_L0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP));
+ mmx_R0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+p_step));
+
+ // abs( R0 - L0 ) < Alpha
+ // MMX doesn't have unsigned compare, so we have to go to short
+ mmx_L0 = _mm_unpacklo_pi8(mmx_L0, mmx_zero);
+ mmx_R0 = _mm_unpacklo_pi8(mmx_R0, mmx_zero);
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0);
+ mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do
+ mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+ SrcPtrP += 4;
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs( R0 - R1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_R0 already populated
+ mmx_R1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+2*p_step));
+ mmx_R1 = _mm_unpacklo_pi8(mmx_R1, mmx_zero);
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1);
+ mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+ SrcPtrP += 4;
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs(L0 - L1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_L0 already populated
+ mmx_L1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP-p_step));
+ mmx_L1 = _mm_unpacklo_pi8(mmx_L1, mmx_zero);
+ mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1);
+ mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+SrcPtrP += 4;
+ goto STAGE; // start the process over from next position
+ }
+
+ // ok, now time to performn the actual calculation. hope it was worth it!!
+
+ // tc0 = ClipTab[ Strng ] + 1
+ mmx_C0 = _mm_setr_pi16(ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2+1]]+1, ClipTab[Strength[i*2+1]]+1);
+ mmx_negative_C0 = _mm_sub_pi16(mmx_zero, mmx_C0);
+
+ // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+ mmx_dif = mmx_R0;
+ mmx_dif = _mm_sub_pi16(mmx_dif, mmx_L0);
+ mmx_dif = _mm_slli_pi16(mmx_dif, 2);
+ mmx_dif = _mm_add_pi16(mmx_dif, mmx_L1);
+ mmx_dif = _mm_sub_pi16(mmx_dif, mmx_R1);
+ mmx_dif = _mm_add_pi16(mmx_dif, mmx_four);
+ mmx_dif = _mm_srai_pi16(mmx_dif, 3);
+ mmx_dif = _mm_min_pi16(mmx_dif, mmx_C0);
+ mmx_dif = _mm_max_pi16(mmx_dif, mmx_negative_C0);
+ mmx_dif = _mm_and_si64(mmx_dif, mmx_match);
+
+ // L0 = (imgpel) iClip1(max_imgpel_value, L0 + dif);
+ mmx_L0 = _mm_add_pi16(mmx_L0, mmx_dif);
+
+ // R0 = (imgpel) iClip1(max_imgpel_value, R0 - dif);
+ mmx_R0 = _mm_sub_pi16(mmx_R0, mmx_dif);
+
+ // store
+ mmx_R0 = _mm_packs_pu16(mmx_R0, mmx_R0);
+ mmx_L0 = _mm_packs_pu16(mmx_L0, mmx_L0);
+
+ *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_L0);
+ *(int *)(SrcPtrP+p_step) = _mm_cvtsi64_si32(mmx_R0);
+
+ if (i++ == 1)
+ return;
+
+ SrcPtrP += 4;
+ goto STAGE; // next stage
+}
+
+
+void EdgeLoopChroma_Horiz_YUV420(VideoImage *image, const byte strength[4], Macroblock *MbQ, int uv, PixelPos pixMB, Macroblock *MbP)
+{
+ // dir == 1
+ imgpel** Img = image->img;
+
+ if (pixMB.available || (MbQ->DFDisableIdc == 0))
+ {
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+
+ // Average QP of the two blocks
+ int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ int Alpha = ALPHA_TABLE[indexA] ;
+ int Beta = BETA_TABLE [indexB] ;
+
+ if (Alpha !=0 && Beta != 0)
+ {
+ const int PelNum = 8;
+
+ int inc_dim = image->stride;
+ imgpel *SrcPtrP;
+
+
+ SrcPtrP = &(Img[pixMB.pos_y>>1][pixMB.pos_x>>1]);
+
+ if (strength[0] == 4) // if strong filter is used, all blocks will be strong
+ {
+ IntraStrongFilter_Chroma8_Horiz_YUV420_sse2(inc_dim, SrcPtrP, Alpha, Beta);
+ }
+ else
+ {
+ const byte *ClipTab = CLIP_TAB[indexA];
+ FilterChroma8_Horiz_sse(inc_dim, SrcPtrP, Alpha, Beta, strength, ClipTab);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/Src/h264dec/ldecod/src/filter_chroma_vert.c b/Src/h264dec/ldecod/src/filter_chroma_vert.c
new file mode 100644
index 00000000..8c4a4c8c
--- /dev/null
+++ b/Src/h264dec/ldecod/src/filter_chroma_vert.c
@@ -0,0 +1,570 @@
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+#include <emmintrin.h>
+static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ;
+static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ;
+static const byte CLIP_TAB[52][5] =
+{
+ { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},
+ { -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},{ -1, 0, 0, 0, 0},
+ { -1, 0, 0, 0, 0},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 0, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 0, 1, 1, 1},{ -1, 1, 1, 1, 1},
+ { -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 1, 1},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 1, 2, 2},{ -1, 1, 2, 3, 3},
+ { -1, 1, 2, 3, 3},{ -1, 2, 2, 3, 3},{ -1, 2, 2, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 2, 3, 4, 4},{ -1, 3, 3, 5, 5},{ -1, 3, 4, 6, 6},{ -1, 3, 4, 6, 6},
+ { -1, 4, 5, 7, 7},{ -1, 4, 5, 8, 8},{ -1, 4, 6, 9, 9},{ -1, 5, 7,10,10},{ -1, 6, 8,11,11},{ -1, 6, 8,13,13},{ -1, 7,10,14,14},{ -1, 8,11,16,16},
+ { -1, 9,12,18,18},{ -1,10,13,20,20},{ -1,11,15,23,23},{ -1,13,17,25,25}
+} ;
+
+static const int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format]
+
+void EdgeLoopChromaNormal_Vert(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p)
+{
+ // dir == 0
+ imgpel** Img = image->img;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+
+ int xQ = edge - 1;
+ int yQ = 0;
+ PixelPos pixMB1;
+
+ p_Vid->getNeighbourX0(MbQ, xQ, p_Vid->mb_size[IS_CHROMA], &pixMB1);
+
+ if (pixMB1.available || (MbQ->DFDisableIdc == 0))
+ {
+ int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+ PixelPos pixP = pixMB1;
+ Macroblock *MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+
+ // Average QP of the two blocks
+ int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ int Beta = BETA_TABLE [indexB] * bitdepth_scale;
+
+ if (Alpha !=0 && Beta != 0)
+ {
+ const int PelNum = pelnum_cr[0][p->chroma_format_idc];
+ const byte *ClipTab = CLIP_TAB[indexA];
+ int inc_dim = 1;
+ int pel;
+ PixelPos pixQ, pixMB2;
+
+ p_Vid->getNeighbourX0(MbQ, edge, p_Vid->mb_size[IS_CHROMA], &pixMB2);
+ pixQ = pixMB2;
+
+ for( pel = 0 ; pel < PelNum ; ++pel )
+ {
+ int Strng = Strength[(PelNum == 8) ? (((pel >> 1) << 2) + (pel & 0x01)) : pel];
+
+ if( Strng != 0)
+ {
+ imgpel *SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+ imgpel L0 = *SrcPtrP;
+ imgpel *SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ imgpel R0 = *SrcPtrQ;
+
+ if ( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *(SrcPtrQ + inc_dim);
+ if ( abs(R0 - R1) < Beta )
+ {
+ imgpel L1 = *(SrcPtrP - inc_dim);
+ if ( abs(L0 - L1) < Beta )
+ {
+ if( Strng == 4 ) // INTRA strong filtering
+ {
+ *SrcPtrP = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ *SrcPtrQ = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ }
+ else
+ {
+ int tc0 = ClipTab[ Strng ] * bitdepth_scale + 1;
+ int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ *SrcPtrP = (imgpel) iClip1 ( max_imgpel_value, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 ( max_imgpel_value, R0 - dif) ;
+ }
+ }
+ }
+ }
+ }
+ pixP.pos_y++;
+ pixQ.pos_y++;
+ }
+ }
+ }
+}
+
+static void FilterStrongChroma_Vert_sse(int p_step, imgpel *SrcPtrP, int Alpha, int Beta)
+{
+ __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1);
+ __m64 mmx_zero = _mm_setzero_si64(), mmx_two=_mm_set1_pi16(2);
+ __m64 mmx_minus_one;
+ __m64 mmx_absdiff, mmx_diff;
+ __m64 mmx_L0, mmx_L1, mmx_L1_L0;
+ __m64 mmx_R0, mmx_R0_R1, mmx_R1;
+ __m64 mmx_load0, mmx_load1, mmx_load2, mmx_load3, mmx_load4, mmx_load5, mmx_load6, mmx_load7, mmx_load8;
+ __m64 mmx_match, mmx_L0_new, mmx_R0_new;
+ int match;
+ int i=0;
+
+ mmx_minus_one = _mm_set1_pi32(-1);
+ SrcPtrP -= 1;
+
+ STAGE:
+ mmx_load0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP)); // La1 La0 Ra0 Ra1 --- --- --- ---
+ mmx_load1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lb1 Lb0 Rb0 Rb1 --- --- --- ---
+ mmx_load4 = _mm_unpacklo_pi8(mmx_load0, mmx_load1); // La1 Lb1 La0 Lb0 Ra0 Rb0 Ra1 Rb1
+ mmx_load2 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lc1 Lc0 Rc0 Rc1 --- --- --- ---
+ mmx_load3 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Ld1 Ld0 Rd0 Rd1 --- --- --- ---
+ SrcPtrP+=p_step;
+ mmx_load5 = _mm_unpacklo_pi8(mmx_load2, mmx_load3); // Lc1 Ld1 Lc0 Ld0 Rc0 Rd0 Rc1 Rd1
+ mmx_L1_L0 = _mm_unpacklo_pi16(mmx_load4, mmx_load5); // La1 Lb1 Lc1 Ld1 La0 Lb0 Lc0 Ld0
+ mmx_R0_R1 = _mm_unpackhi_pi16(mmx_load4, mmx_load5); // Ra0 Rb0 Rc0 Rd0 Ra1 Rb1 Rc1 Rd1
+
+ // abs( R0 - L0 ) < Alpha
+ // MMX doesn't have unsigned compare, so we have to go to short
+ mmx_L0 = _mm_unpackhi_pi8(mmx_L1_L0, mmx_zero); // La0 Lb0 Lc0 Ld0
+ mmx_R0 = _mm_unpacklo_pi8(mmx_R0_R1, mmx_zero); // Ra0 Rb0 Rc0 Rd0
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0);
+ mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do
+ mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs( R0 - R1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_R0 already populated
+ mmx_R1 = _mm_unpackhi_pi8(mmx_R0_R1, mmx_zero); // Ra1 Rb1 Rc1 Rd1
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1);
+ mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs(L0 - L1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_L0 already populated
+ mmx_L1 = _mm_unpacklo_pi8(mmx_L1_L0, mmx_zero); // La1 Lb1 Lc1 Ld1
+ mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1);
+ mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // ok, now time to performn the actual calculation. hope it was worth it!!
+
+ // L0 = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ mmx_L0_new = mmx_L1;
+ mmx_L0_new = _mm_slli_pi16(mmx_L0_new, 1);
+ mmx_L0_new = _mm_add_pi16(mmx_L0_new, mmx_L0);
+ mmx_L0_new = _mm_add_pi16(mmx_L0_new, mmx_R1);
+ mmx_L0_new = _mm_add_pi16(mmx_L0_new, mmx_two);
+ mmx_L0_new = _mm_srai_pi16(mmx_L0_new, 2);
+ mmx_L0_new = _mm_and_si64(mmx_L0_new, mmx_match);
+ mmx_L0 = _mm_andnot_si64(mmx_match, mmx_L0);
+ mmx_L0 = _mm_or_si64(mmx_L0, mmx_L0_new);
+
+ // R0 = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ mmx_R0_new = mmx_R1;
+ mmx_R0_new = _mm_slli_pi16(mmx_R0_new, 1);
+ mmx_R0_new = _mm_add_pi16(mmx_R0_new, mmx_R0);
+ mmx_R0_new = _mm_add_pi16(mmx_R0_new, mmx_L1);
+ mmx_R0_new = _mm_add_pi16(mmx_R0_new, mmx_two);
+ mmx_R0_new = _mm_srai_pi16(mmx_R0_new, 2);
+ mmx_R0_new = _mm_and_si64(mmx_R0_new, mmx_match);
+ mmx_R0 = _mm_andnot_si64(mmx_match, mmx_R0);
+ mmx_R0 = _mm_or_si64(mmx_R0, mmx_R0_new);
+
+ // now for the super-exciting fun of getting this data back into memory
+ SrcPtrP -= 4*p_step;
+
+ // rotate 4x4 matrix
+ mmx_load1 = _mm_unpacklo_pi16(mmx_L1, mmx_R0); // 00 20 01 21
+ mmx_load3 = _mm_unpackhi_pi16(mmx_L1, mmx_R0); // 02 22 03 23
+ mmx_load2 = _mm_unpacklo_pi16(mmx_L0, mmx_R1); // 10 30 11 31
+ mmx_load4 = _mm_unpackhi_pi16(mmx_L0, mmx_R1); // 12 32 13 33
+ mmx_load5 = _mm_unpacklo_pi16(mmx_load1, mmx_load2); // 00 10 20 30
+ mmx_load6 = _mm_unpackhi_pi16(mmx_load1, mmx_load2); // 01 11 21 31
+ mmx_load7 = _mm_unpacklo_pi16(mmx_load3, mmx_load4); // 02 12 22 32
+ mmx_load8 = _mm_unpackhi_pi16(mmx_load3, mmx_load4); // 03 13 23 33
+ mmx_load5 = _mm_packs_pu16(mmx_load5, mmx_load5);
+ mmx_load6 = _mm_packs_pu16(mmx_load6, mmx_load6);
+ mmx_load7 = _mm_packs_pu16(mmx_load7, mmx_load7);
+ mmx_load8 = _mm_packs_pu16(mmx_load8, mmx_load8);
+
+ //mmx_load1 = _mm_setr_pi16(0x8080, 0x80, 0, 0);
+ *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_load5);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load6);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load7);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load8);
+
+ if (i++ == 1)
+ return;
+
+ SrcPtrP += p_step;
+ goto STAGE; // next stage
+}
+
+static void FilterChroma_Vert_sse(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const char *ClipTab)
+{
+ __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1);
+ __m64 mmx_zero = _mm_setzero_si64(), mmx_four=_mm_set1_pi16(4);
+ __m64 mmx_minus_one;
+ __m64 mmx_absdiff, mmx_diff;
+ __m64 mmx_L0, mmx_L1, mmx_L1_L0;
+ __m64 mmx_R0, mmx_R0_R1, mmx_R1;
+ __m64 mmx_load0, mmx_load1, mmx_load2, mmx_load3, mmx_load4, mmx_load5, mmx_load6, mmx_load7, mmx_load8;
+ __m64 mmx_C0, mmx_negative_C0, mmx_dif, mmx_match;
+ int match;
+ int i=0;
+
+ mmx_minus_one = _mm_set1_pi32(-1);
+ SrcPtrP -= 1;
+
+ STAGE:
+
+ while (!Strength[i*2] && !Strength[i*2+1])
+ {
+ SrcPtrP += p_step*4;
+ if (i++ == 1) // last stage
+ return;
+ }
+
+ mmx_load0 = _mm_cvtsi32_si64(*(int *)(SrcPtrP)); // La1 La0 Ra0 Ra1 --- --- --- ---
+ mmx_load1 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lb1 Lb0 Rb0 Rb1 --- --- --- ---
+ mmx_load4 = _mm_unpacklo_pi8(mmx_load0, mmx_load1); // La1 Lb1 La0 Lb0 Ra0 Rb0 Ra1 Rb1
+ mmx_load2 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Lc1 Lc0 Rc0 Rc1 --- --- --- ---
+ mmx_load3 = _mm_cvtsi32_si64(*(int *)(SrcPtrP+=p_step)); // Ld1 Ld0 Rd0 Rd1 --- --- --- ---
+ SrcPtrP+=p_step;
+ mmx_load5 = _mm_unpacklo_pi8(mmx_load2, mmx_load3); // Lc1 Ld1 Lc0 Ld0 Rc0 Rd0 Rc1 Rd1
+ mmx_L1_L0 = _mm_unpacklo_pi16(mmx_load4, mmx_load5); // La1 Lb1 Lc1 Ld1 La0 Lb0 Lc0 Ld0
+ mmx_R0_R1 = _mm_unpackhi_pi16(mmx_load4, mmx_load5); // Ra0 Rb0 Rc0 Rd0 Ra1 Rb1 Rc1 Rd1
+
+ // abs( R0 - L0 ) < Alpha
+ // MMX doesn't have unsigned compare, so we have to go to short
+ mmx_L0 = _mm_unpackhi_pi8(mmx_L1_L0, mmx_zero); // La0 Lb0 Lc0 Ld0
+ mmx_R0 = _mm_unpacklo_pi8(mmx_R0_R1, mmx_zero); // Ra0 Rb0 Rc0 Rd0
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0);
+ mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do
+ mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs( R0 - R1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_R0 already populated
+ mmx_R1 = _mm_unpackhi_pi8(mmx_R0_R1, mmx_zero); // Ra1 Rb1 Rc1 Rd1
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1);
+ mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs(L0 - L1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_L0 already populated
+ mmx_L1 = _mm_unpacklo_pi8(mmx_L1_L0, mmx_zero); // La1 Lb1 Lc1 Ld1
+ mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1);
+ mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 1) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // ok, now time to performn the actual calculation. hope it was worth it!!
+
+ // tc0 = ClipTab[ Strng ] + 1
+ mmx_C0 = _mm_setr_pi16(ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2]]+1, ClipTab[Strength[i*2+1]]+1, ClipTab[Strength[i*2+1]]+1);
+ mmx_negative_C0 = _mm_sub_pi16(mmx_zero, mmx_C0);
+
+ // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+ mmx_dif = mmx_R0;
+ mmx_dif = _mm_sub_pi16(mmx_dif, mmx_L0);
+ mmx_dif = _mm_slli_pi16(mmx_dif, 2);
+ mmx_dif = _mm_add_pi16(mmx_dif, mmx_L1);
+ mmx_dif = _mm_sub_pi16(mmx_dif, mmx_R1);
+ mmx_dif = _mm_add_pi16(mmx_dif, mmx_four);
+ mmx_dif = _mm_srai_pi16(mmx_dif, 3);
+ mmx_dif = _mm_min_pi16(mmx_dif, mmx_C0);
+ mmx_dif = _mm_max_pi16(mmx_dif, mmx_negative_C0);
+ mmx_dif = _mm_and_si64(mmx_dif, mmx_match);
+
+ // L0 = (imgpel) iClip1(max_imgpel_value, L0 + dif);
+ mmx_L0 = _mm_add_pi16(mmx_L0, mmx_dif);
+
+ // R0 = (imgpel) iClip1(max_imgpel_value, R0 - dif);
+ mmx_R0 = _mm_sub_pi16(mmx_R0, mmx_dif);
+
+ // now for the super-exciting fun of getting this data back into memory
+ SrcPtrP -= 4*p_step;
+
+ // rotate 4x4 matrix
+ mmx_load1 = _mm_unpacklo_pi16(mmx_L1, mmx_R0); // 00 20 01 21
+ mmx_load2 = _mm_unpacklo_pi16(mmx_L0, mmx_R1); // 10 30 11 31
+ mmx_load3 = _mm_unpackhi_pi16(mmx_L1, mmx_R0); // 02 22 03 23
+ mmx_load4 = _mm_unpackhi_pi16(mmx_L0, mmx_R1); // 12 32 13 33
+ mmx_load5 = _mm_unpacklo_pi16(mmx_load1, mmx_load2); // 00 10 20 30
+ mmx_load6 = _mm_unpackhi_pi16(mmx_load1, mmx_load2); // 01 11 21 31
+ mmx_load7 = _mm_unpacklo_pi16(mmx_load3, mmx_load4); // 02 12 22 32
+ mmx_load8 = _mm_unpackhi_pi16(mmx_load3, mmx_load4); // 03 13 23 33
+ mmx_load5 = _mm_packs_pu16(mmx_load5, mmx_load5);
+ mmx_load6 = _mm_packs_pu16(mmx_load6, mmx_load6);
+ mmx_load7 = _mm_packs_pu16(mmx_load7, mmx_load7);
+ mmx_load8 = _mm_packs_pu16(mmx_load8, mmx_load8);
+
+ *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_load5);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load6);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load7);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load8);
+
+ if (i++ == 1)
+ return;
+
+ SrcPtrP += p_step;
+ goto STAGE; // next stage
+}
+
+static void FilterStrongChroma_Vert_c(int p_step, imgpel *SrcPtrP, int Alpha, int Beta)
+{
+ int i;
+ for (i=0;i<8;i++)
+ {
+ imgpel L0 = SrcPtrP[0];
+ imgpel R0 = SrcPtrP[1];
+ if ( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = SrcPtrP[2];
+ if ( abs(R0 - R1) < Beta )
+ {
+ imgpel L1 = SrcPtrP[-1];
+ if ( abs(L0 - L1) < Beta )
+ {
+ SrcPtrP[0] = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ SrcPtrP[1] = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ }
+ }
+ }
+ SrcPtrP+=p_step;
+ }
+}
+
+static void FilterChroma_Vert_c(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const byte *ClipTab)
+{
+ int i;
+ for (i=0;i<8;i++)
+ {
+ if (Strength[i>>1])
+ {
+ imgpel L0 = *SrcPtrP;
+ imgpel *SrcPtrQ = SrcPtrP + 1;
+ imgpel R0 = *SrcPtrQ;
+
+ if ( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *(SrcPtrQ + 1);
+ if ( abs(R0 - R1) < Beta )
+ {
+ imgpel L1 = *(SrcPtrP - 1);
+ if ( abs(L0 - L1) < Beta )
+ {
+ int tc0 = ClipTab[ Strength[(i*2)/4] ] * 1 + 1;
+ int dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ *SrcPtrP = (imgpel) iClip1 ( 255, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 ( 255, R0 - dif) ;
+
+ }
+ }
+ }
+ }
+ SrcPtrP+=p_step;
+ }
+}
+
+void EdgeLoopChroma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, int uv, PixelPos pixMB1, Macroblock *MbP)
+{
+ // dir == 0
+ imgpel** Img = image->img;
+
+ if (pixMB1.available || (MbQ->DFDisableIdc == 0))
+ {
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+
+ // Average QP of the two blocks
+ int QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ int Alpha = ALPHA_TABLE[indexA];
+ if (Alpha)
+ {
+ int indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+ int Beta = BETA_TABLE [indexB];
+
+ if (Beta != 0)
+ {
+ const byte *ClipTab = CLIP_TAB[indexA];
+ const int stride = image->stride;
+ imgpel *SrcPtrP = &(Img[pixMB1.pos_y >> 1][pixMB1.pos_x >> 1]);
+
+ if (Strength[0] == 4)
+ {
+ FilterStrongChroma_Vert_sse(stride, SrcPtrP, Alpha, Beta);
+ }
+ else
+ {
+ FilterChroma_Vert_sse(stride, SrcPtrP, Alpha, Beta, Strength, ClipTab);
+ }
+ }
+ }
+ }
+}
+
+void EdgeLoopChromaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p)
+{
+ // dir == 0
+ imgpel** Img = image->img;
+
+ int pel, Strng ;
+ int incP, incQ;
+ int C0, tc0, dif;
+ imgpel L0, R0;
+ int Alpha = 0, Beta = 0;
+ const byte* ClipTab = NULL;
+ int indexA, indexB;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int StrengthIdx;
+ int QP;
+ int xQ, yQ;
+ PixelPos pixP, pixQ;
+ int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+ byte fieldModeFilteringFlag;
+ Macroblock *MbP;
+ imgpel *SrcPtrP, *SrcPtrQ;
+ int width = image->stride;
+
+ for( pel = 0 ; pel < 8 ; ++pel )
+ {
+ xQ = edge;
+ yQ = pel;
+ getAffNeighbour(MbQ, xQ, yQ, p_Vid->mb_size[IS_CHROMA], &pixQ);
+ getAffNeighbour(MbQ, xQ - 1, yQ, p_Vid->mb_size[IS_CHROMA], &pixP);
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ StrengthIdx = ((MbQ->mb_field && !MbP->mb_field) ? pel << 1 :((pel >> 1) << 2) + (pel & 0x01));
+
+ if (pixP.available || (MbQ->DFDisableIdc == 0))
+ {
+ if( (Strng = Strength[StrengthIdx]) != 0)
+ {
+ fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field);
+ incQ = 1;
+ incP = 1;
+ SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+
+ // Average QP of the two blocks
+ QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1;
+
+ indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ Beta = BETA_TABLE [indexB] * bitdepth_scale;
+ ClipTab = CLIP_TAB[indexA];
+
+
+ L0 = SrcPtrP[0] ;
+ R0 = SrcPtrQ[0] ;
+
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel L1 = SrcPtrP[-incP];
+ imgpel R1 = SrcPtrQ[ incQ];
+ //if( ((abs( R0 - R1) - Beta ) & (abs(L0 - L1) - Beta )) < 0 )
+ if( ((abs( R0 - R1) - Beta < 0) && (abs(L0 - L1) - Beta < 0 )) )
+ {
+ if( Strng == 4 ) // INTRA strong filtering
+ {
+ SrcPtrQ[0] = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ SrcPtrP[0] = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ }
+ else
+ {
+ C0 = ClipTab[ Strng ] * bitdepth_scale;
+ tc0 = (C0 + 1);
+ dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ SrcPtrP[0] = (imgpel) iClip1 ( max_imgpel_value, L0 + dif );
+ SrcPtrQ[0] = (imgpel) iClip1 ( max_imgpel_value, R0 - dif );
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/filter_luma_horiz.c b/Src/h264dec/ldecod/src/filter_luma_horiz.c
new file mode 100644
index 00000000..97438dfb
--- /dev/null
+++ b/Src/h264dec/ldecod/src/filter_luma_horiz.c
@@ -0,0 +1,871 @@
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+#include <mmintrin.h>
+#include <emmintrin.h>
+
+static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ;
+static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ;
+static const byte CLIP_TAB[52][5] =
+{
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1},
+ { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3},
+ { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6},
+ { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16},
+ { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25}
+} ;
+
+// benski> used for unit testing, not in production code
+static int CalculateMatches(int inc_dim, const imgpel *SrcPtrP, const imgpel *SrcPtrQ, int Alpha, int Beta)
+{
+ int match=0;
+ const imgpel *P_L1 = SrcPtrP - inc_dim;
+ const imgpel *Q_R1 = SrcPtrQ + inc_dim;
+
+
+ int pel;
+ for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP++, SrcPtrQ++, Q_R1++, P_L1++)
+ {
+ imgpel L0 = *SrcPtrP;
+ imgpel R0 = *SrcPtrQ;
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *Q_R1;
+ if ((abs( R0 - R1) < Beta))
+ {
+ imgpel L1 = *P_L1;
+ if ((abs(L0 - L1) < Beta))
+ {
+ match |= (1 << (pel*2));
+ match |= (1 << (pel*2+1));
+ }
+ }
+ }
+ }
+ return match;
+}
+
+static void IntraStrongFilter_Luma_Horiz(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta)
+{
+
+ imgpel *P_L1 = SrcPtrP - inc_dim;
+ imgpel *P_L2 = P_L1 - inc_dim;
+ const imgpel *P_L3 = P_L2 - inc_dim;
+
+ imgpel *Q_R1 = SrcPtrQ + inc_dim;
+ imgpel *Q_R2 = Q_R1 + inc_dim;
+ const imgpel *Q_R3 = Q_R2 + inc_dim;
+
+
+ int pel;
+ for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP++, SrcPtrQ++, Q_R1++, P_L1++, Q_R2++, P_L2++, Q_R3++, P_L3++)
+ {
+ imgpel L0 = *SrcPtrP;
+ imgpel R0 = *SrcPtrQ;
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *Q_R1;
+ if ((abs( R0 - R1) < Beta))
+ {
+ imgpel L1 = *P_L1;
+ if ((abs(L0 - L1) < Beta))
+ {
+ imgpel R2 = *Q_R2;
+ imgpel L2 = *P_L2;
+
+ int RL0 = L0 + R0;
+ int small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2));
+ int aq = ( abs( R0 - R2) < Beta ) & small_gap;
+ int ap = ( abs( L0 - L2) < Beta ) & small_gap;
+
+ if (ap)
+ {
+ int L1RL0 = L1 + RL0;
+ imgpel L3 = *P_L3;
+ *SrcPtrP = (imgpel) (( R1 + ((L1RL0) << 1) + L2 + 4) >> 3);
+ *P_L1 = (imgpel) (( L2 + L1RL0 + 2) >> 2);
+ *P_L2 = (imgpel) ((((L3 + L2) <<1) + L2 + L1RL0 + 4) >> 3);
+ }
+ else
+ {
+ *SrcPtrP = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ;
+ }
+
+ if (aq)
+ {
+ imgpel R3 = *Q_R3;
+ *(SrcPtrQ ) = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3);
+ *Q_R1 = (imgpel) (( R2 + R0 + L0 + R1 + 2) >> 2);
+ *Q_R2 = (imgpel) ((((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3);
+ }
+ else
+ {
+ *SrcPtrQ = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2);
+ }
+ }
+ }
+ }
+ }
+}
+
+#define LOAD_LINE_EPI16(reg, ptr) { reg = _mm_loadl_epi64((__m128i *)(ptr)); reg = _mm_unpacklo_epi8(reg, xmm_zero); }
+static void IntraStrongFilter_Luma_Horiz_sse2(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, __m128i xmm_alpha, __m128i xmm_beta, __m128i xmm_match)
+{
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_smallgap;
+ __m128i xmm_ap, xmm_aq;
+ __m128i xmm_L3, xmm_L2, xmm_L1, xmm_L0, xmm_R0, xmm_R1, xmm_R2, xmm_R3;
+ __m128i xmm_4 = _mm_set1_epi16(4), xmm_2 = _mm_set1_epi16(2);
+ __m128i xmm_add, xmm_add2, xmm_acc, xmm_match_and_an;
+ __m128i xmm_absdiff, xmm_diff;
+
+ LOAD_LINE_EPI16(xmm_L0, SrcPtrP);
+ LOAD_LINE_EPI16(xmm_R0, SrcPtrQ);
+
+ // small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2));
+ xmm_alpha = _mm_srai_epi16(xmm_alpha, 2);
+ xmm_alpha = _mm_add_epi16(xmm_alpha, xmm_2);
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0);
+ xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_smallgap = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha);
+
+ LOAD_LINE_EPI16(xmm_R2, SrcPtrQ + 2*inc_dim);
+
+ // (abs(R0 - R2) < Beta) & small_gap;
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R2);
+ xmm_absdiff =_mm_subs_epu16(xmm_R2, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_aq = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_aq = _mm_and_si128(xmm_aq, xmm_smallgap);
+
+ LOAD_LINE_EPI16(xmm_L2, SrcPtrP - 2*inc_dim);
+
+ // (abs(L0 - L2) < Beta) & small_gap;
+ xmm_diff=_mm_subs_epu16(xmm_L0, xmm_L2);
+ xmm_absdiff =_mm_subs_epu16(xmm_L2, xmm_L0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_ap = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_ap = _mm_and_si128(xmm_ap, xmm_smallgap);
+
+ LOAD_LINE_EPI16(xmm_L1, SrcPtrP - inc_dim);
+ LOAD_LINE_EPI16(xmm_R1, SrcPtrQ + inc_dim);
+ LOAD_LINE_EPI16(xmm_L3, SrcPtrP - 3*inc_dim);
+ LOAD_LINE_EPI16(xmm_R3, SrcPtrQ + 3*inc_dim);
+
+ xmm_match_and_an=_mm_and_si128(xmm_match, xmm_ap);
+
+ // if(ap) SrcPtrP = (imgpel) (( R1 + ((L1 + L0 + R0) << 1) + L2 + 4) >> 3)
+ xmm_add = xmm_L1;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_slli_epi16(xmm_add, 1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_4);
+ xmm_add = _mm_srai_epi16(xmm_add, 3);
+ xmm_acc = _mm_and_si128(xmm_add, xmm_match_and_an);
+
+ // if (ap) *P_L1 = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2);
+ xmm_add = xmm_L2;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_2);
+ xmm_add = _mm_srai_epi16(xmm_add, 2);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an);
+ xmm_add2= xmm_L1;
+ xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2);
+ xmm_add=_mm_add_epi16(xmm_add, xmm_add2);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrP-inc_dim), xmm_add);
+
+
+ // if (ap) *P_L2 = (imgpel) ((((L3 + L2) <<1) + L2 + L1 + L0 + R0 + 4) >> 3);
+ xmm_add = xmm_L3;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L2);
+ xmm_add = _mm_slli_epi16(xmm_add, 1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_4);
+ xmm_add = _mm_srai_epi16(xmm_add, 3);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an);
+ xmm_add2= xmm_L2;
+ xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2);
+ xmm_add=_mm_add_epi16(xmm_add, xmm_add2);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrP-2*inc_dim), xmm_add);
+
+ // if (!ap) *SrcPtrP = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ;
+ xmm_add = xmm_L1;
+ xmm_add = _mm_slli_epi16(xmm_add, 1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_2);
+ xmm_add = _mm_srai_epi16(xmm_add, 2);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match);
+ xmm_add = _mm_andnot_si128(xmm_ap, xmm_add);
+ xmm_add2= xmm_L0;
+ //xmm_match_and_an=_mm_or_si128(xmm_match, xmm_ap);
+ xmm_add2=_mm_andnot_si128(xmm_match, xmm_add2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_add2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_acc);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_add);
+
+ xmm_match_and_an=_mm_and_si128(xmm_match, xmm_aq);
+
+ // if (aq) *(SrcPtrQ ) = (imgpel) (( L1 + ((R1 + L0 + R0) << 1) + R2 + 4) >> 3);
+ xmm_add = xmm_R1;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_slli_epi16(xmm_add, 1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_4);
+ xmm_add = _mm_srai_epi16(xmm_add, 3);
+ xmm_acc = _mm_and_si128(xmm_add, xmm_match_and_an);
+
+ // if (aq) *Q_R1 = (imgpel) (( R2 + R0 + L0 + R1 + 2) >> 2);
+ xmm_add = xmm_R2;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_2);
+ xmm_add = _mm_srai_epi16(xmm_add, 2);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an);
+ xmm_add2= xmm_R1;
+ xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2);
+ xmm_add=_mm_add_epi16(xmm_add, xmm_add2);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrQ+inc_dim), xmm_add);
+
+ // if (aq) *Q_R2 = (imgpel) ((((R3 + R2) <<1) + R2 + R1 + L0 + R0 + 4) >> 3);
+ xmm_add = xmm_R3;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R2);
+ xmm_add = _mm_slli_epi16(xmm_add, 1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_4);
+ xmm_add = _mm_srai_epi16(xmm_add, 3);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match_and_an);
+ xmm_add2= xmm_R2;
+ xmm_add2= _mm_andnot_si128(xmm_match_and_an, xmm_add2);
+ xmm_add=_mm_add_epi16(xmm_add, xmm_add2);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+
+ _mm_storel_epi64((__m128i *)(SrcPtrQ+2*inc_dim), xmm_add);
+
+ // if (!aq) *SrcPtrQ = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2);
+ xmm_add = xmm_R1;
+ xmm_add = _mm_slli_epi16(xmm_add, 1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_2);
+ xmm_add = _mm_srai_epi16(xmm_add, 2);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match);
+ xmm_add = _mm_andnot_si128(xmm_aq, xmm_add);
+ xmm_add2= xmm_R0;
+ //xmm_match_and_an=_mm_or_si128(xmm_match, xmm_aq);
+ xmm_add2=_mm_andnot_si128(xmm_match, xmm_add2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_add2);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_acc);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+
+ _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_add);
+
+
+}
+
+// benski> for some reason, Visual Studio 2008 only allows for 3 __m128i parameters, or else we'd pass a whole lot more for optimization reasons
+// we could put this function straight into EdgeLoopLumaNormal_Horiz_sse2 if we think it's worth it
+static void FilterLuma_Horiz_sse2(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, __m128i xmm_beta, int C0[2], __m128i xmm_match)
+{
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_C0 = _mm_setr_epi16(C0[0], C0[0], C0[0], C0[0], C0[1], C0[1], C0[1], C0[1]); // TODO: benski> probably a better way to do this.
+ __m128i xmm_negative_C0;
+ __m128i xmm_tc0;
+ __m128i xmm_L2, xmm_L1, xmm_L0, xmm_R0, xmm_R1, xmm_R2;
+ __m128i xmm_absdiff, xmm_diff;
+ __m128i xmm_dif;
+ __m128i xmm_4 = _mm_set1_epi16(4), xmm_1 = _mm_set1_epi16(1);
+ __m128i xmm_add;
+ __m128i xmm_ap, xmm_aq;
+
+ xmm_negative_C0 = _mm_sub_epi16(xmm_zero, xmm_C0);
+ xmm_tc0 = xmm_C0;
+
+ xmm_R2 = _mm_loadl_epi64((__m128i *)(SrcPtrQ + 2*inc_dim));
+ xmm_R2 = _mm_unpacklo_epi8(xmm_R2, xmm_zero);
+
+ xmm_R0 = _mm_loadl_epi64((__m128i *)(SrcPtrQ));
+ xmm_R0 = _mm_unpacklo_epi8(xmm_R0, xmm_zero);
+
+ // (abs(R0 - R2) < Beta);
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R2);
+ xmm_absdiff =_mm_subs_epu16(xmm_R2, xmm_R0);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_aq = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_add = _mm_srli_epi16(xmm_aq, 15); // convert 0xFFFF to 1 and 0x0000 to 0
+ xmm_tc0 = _mm_adds_epu16(xmm_tc0, xmm_add); // tc0 = (C0 + ap + aq) ;
+
+ xmm_L2 = _mm_loadl_epi64((__m128i *)(SrcPtrP - 2*inc_dim));
+ xmm_L2 = _mm_unpacklo_epi8(xmm_L2, xmm_zero);
+
+ xmm_L0 = _mm_loadl_epi64((__m128i *)(SrcPtrP));
+ xmm_L0 = _mm_unpacklo_epi8(xmm_L0, xmm_zero);
+
+ // (abs(L0 - L2) < Beta);
+ xmm_diff=_mm_subs_epu16(xmm_L2, xmm_L0);
+ xmm_absdiff =_mm_subs_epu16(xmm_L0, xmm_L2);
+ xmm_absdiff =_mm_or_si128(xmm_absdiff, xmm_diff);
+ xmm_ap = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_add = _mm_srli_epi16(xmm_ap, 15); // convert 0xFFFF to 1 and 0x0000 to 0
+ xmm_tc0 = _mm_adds_epu16(xmm_tc0, xmm_add); // tc0 = (C0 + ap + aq) ;
+
+ xmm_L1 = _mm_loadl_epi64((__m128i *)(SrcPtrP - inc_dim));
+ xmm_L1 = _mm_unpacklo_epi8(xmm_L1, xmm_zero);
+
+ xmm_R1 = _mm_loadl_epi64((__m128i *)(SrcPtrQ + inc_dim));
+ xmm_R1 = _mm_unpacklo_epi8(xmm_R1, xmm_zero);
+
+ // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + L1 - R1 + 4) >> 3 );
+ xmm_dif = xmm_R0;
+ xmm_dif = _mm_sub_epi16(xmm_dif, xmm_L0);
+ xmm_dif = _mm_slli_epi16(xmm_dif, 2);
+ xmm_dif = _mm_add_epi16(xmm_dif, xmm_L1);
+ xmm_dif = _mm_sub_epi16(xmm_dif, xmm_R1);
+ xmm_dif = _mm_add_epi16(xmm_dif, xmm_4);
+ xmm_dif = _mm_srai_epi16(xmm_dif, 3);
+ xmm_dif = _mm_min_epi16(xmm_dif, xmm_tc0);
+ xmm_tc0 = _mm_sub_epi16(xmm_zero, xmm_tc0);
+ xmm_dif = _mm_max_epi16(xmm_dif, xmm_tc0);
+ xmm_dif = _mm_and_si128(xmm_dif, xmm_match);
+
+ // if( ap ) *P_L1 += iClip3( -C0, C0, (L2 + ((L0 + R0 + 1) >> 1) - (L1<<1)) >> 1 );
+ xmm_add = xmm_L0;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_1);
+ xmm_add = _mm_srai_epi16(xmm_add, 1);
+ xmm_add = _mm_sub_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_sub_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L2);
+ xmm_add = _mm_srai_epi16(xmm_add, 1);
+ xmm_add = _mm_min_epi16(xmm_add, xmm_C0);
+ xmm_add = _mm_max_epi16(xmm_add, xmm_negative_C0);
+ xmm_add = _mm_and_si128(xmm_add, xmm_ap);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_L1);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrP-inc_dim), xmm_add);
+
+ // *SrcPtrP = (imgpel) iClip1(max_imgpel_value, L0 + dif);
+ xmm_add = _mm_add_epi16(xmm_dif, xmm_L0);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrP), xmm_add);
+
+ // *SrcPtrQ = (imgpel) iClip1(max_imgpel_value, R0 - dif);
+ xmm_add = _mm_sub_epi16(xmm_R0, xmm_dif);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrQ), xmm_add);
+
+ // if (aq) *Q_R1 += iClip3( -C0, C0, (R2 + ((L0 + R0 + 1) >> 1) - (R1<<1)) >> 1 );
+ xmm_add = xmm_L0;
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R0);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_1);
+ xmm_add = _mm_srai_epi16(xmm_add, 1);
+ xmm_add = _mm_sub_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_sub_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R2);
+ xmm_add = _mm_srai_epi16(xmm_add, 1);
+ xmm_add = _mm_min_epi16(xmm_add, xmm_C0);
+ xmm_add = _mm_max_epi16(xmm_add, xmm_negative_C0);
+ xmm_add = _mm_and_si128(xmm_add, xmm_aq);
+ xmm_add = _mm_and_si128(xmm_add, xmm_match);
+ xmm_add = _mm_add_epi16(xmm_add, xmm_R1);
+ xmm_add = _mm_packus_epi16(xmm_add, xmm_add);
+ _mm_storel_epi64((__m128i *)(SrcPtrQ+inc_dim), xmm_add);
+}
+
+static void FilterLuma_Horiz(int inc_dim, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta, int C0, int max_imgpel_value)
+{
+ imgpel *P_L1 = SrcPtrP - inc_dim;
+ const imgpel *P_L2 = P_L1 - inc_dim;
+ imgpel *Q_R1 = SrcPtrQ + inc_dim;
+ const imgpel *Q_R2 = Q_R1 + inc_dim;
+
+ int pel;
+ for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP++, SrcPtrQ++, Q_R1++, P_L1++, Q_R2++, P_L2++)
+ {
+ imgpel L0 = *SrcPtrP;
+ imgpel R0 = *SrcPtrQ;
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = *Q_R1;
+ if (abs( R0 - R1) < Beta)
+ {
+ imgpel L1 = *P_L1;
+ if (abs(L0 - L1) < Beta)
+ {
+ imgpel R2 = *Q_R2;
+ imgpel L2 = *P_L2;
+
+ int RL0 = (L0 + R0 + 1) >> 1;
+ int aq = (abs(R0 - R2) < Beta);
+ int ap = (abs(L0 - L2) < Beta);
+
+ //int C0 = ClipTab[ strength ] * bitdepth_scale;
+ int tc0 = (C0 + ap + aq) ;
+ int dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ if( ap )
+ *P_L1 += iClip3( -C0, C0, (L2 + RL0 - (L1<<1)) >> 1 );
+ *SrcPtrP = (imgpel) iClip1(max_imgpel_value, L0 + dif);
+
+ *SrcPtrQ = (imgpel) iClip1(max_imgpel_value, R0 - dif);
+ if( aq )
+ *Q_R1 += iClip3( -C0, C0, (R2 + RL0 - (R1<<1)) >> 1 );
+ }
+ }
+ }
+ }
+}
+
+
+/* benski> this exists for unit testing, not used in production code */
+static int CalculateMatches_sse2(int inc_dim, const imgpel *SrcPtrP, const imgpel *SrcPtrQ, int Alpha, int Beta, __m128i *xmm_result)
+{
+ int match;
+ __m128i xmm_L0, xmm_R0, xmm_R1, xmm_L1;
+ __m128i xmm_absdiff, xmm_diff, xmm_alpha, xmm_beta, xmm_zero, xmm_strength;
+
+ xmm_zero = _mm_setzero_si128();
+ xmm_alpha = _mm_set1_epi16((uint16_t)Alpha);
+ xmm_beta= _mm_set1_epi16((uint16_t)Beta);
+
+ // abs( R0 - L0 )
+ LOAD_LINE_EPI16(xmm_L0, SrcPtrP);
+ LOAD_LINE_EPI16(xmm_R0, SrcPtrQ);
+
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_L0);
+ xmm_absdiff=_mm_subs_epu16(xmm_L0, xmm_R0);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if( abs( R0 - L0 ) < Alpha )
+ xmm_strength = _mm_cmplt_epi16(xmm_absdiff, xmm_alpha);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return 0;
+
+ // abs(R0 - R1)
+ LOAD_LINE_EPI16(xmm_R1, SrcPtrQ+inc_dim);
+ xmm_diff=_mm_subs_epu16(xmm_R0, xmm_R1);
+ xmm_absdiff=_mm_subs_epu16(xmm_R1, xmm_R0);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if (abs( R0 - R1) < Beta)
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return 0;
+
+ // abs(L0 - L1)
+ LOAD_LINE_EPI16(xmm_L1, SrcPtrP-inc_dim);
+ xmm_diff=_mm_subs_epu16(xmm_L1, xmm_L0);
+ xmm_absdiff=_mm_subs_epu16(xmm_L0, xmm_L1);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if ((abs(L0 - L1) < Beta))
+ xmm_absdiff = _mm_cmplt_epi16(xmm_absdiff, xmm_beta);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_absdiff);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return 0;
+
+ *xmm_result = xmm_strength;
+ return match;
+}
+
+void EdgeLoopLumaNormal_Horiz_sse2(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p)
+{
+ // dir == 1
+ __m128i xmm_L0, xmm_R0, xmm_R1, xmm_L1;
+ __m128i xmm_absdiff, xmm_diff, xmm_alpha, xmm_beta, xmm_comphi, xmm_complo, xmm_zero, xmm_127;
+ __m128i xmm_strength;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int yQ = (edge < MB_BLOCK_SIZE ? edge - 1: 0);
+ int pelmatch;
+
+ PixelPos pixMB1;
+ p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_LUMA], &pixMB1);
+
+ if (pixMB1.available || (MbQ->DFDisableIdc== 0))
+ {
+ int bitdepth_scale = pl ? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA];
+
+ Macroblock *MbP = &(p_Vid->mb_data[pixMB1.mb_addr]);
+
+ // Average QP of the two blocks
+ int QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ if (Alpha)
+ {
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+ int Beta = BETA_TABLE [indexB] * bitdepth_scale;
+
+ if (Beta !=0)
+ {
+ int match;
+ PixelPos pixMB2;
+ const byte *ClipTab = CLIP_TAB [indexA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+ int inc_dim = image->stride;
+ imgpel *SrcPtrQ;
+ imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x;
+
+ p_Vid->getNeighbour0X(MbQ, yQ+1, p_Vid->mb_size[IS_LUMA], &pixMB2);
+ SrcPtrQ = image->base_address + pixMB2.pos_y * image->stride + pixMB2.pos_x;
+
+ xmm_strength = _mm_load_si128((__m128i *)Strength);
+ xmm_127 = _mm_set1_epi8(127);
+ xmm_strength = _mm_adds_epu8(xmm_strength, xmm_127);
+ xmm_strength = _mm_srai_epi16(xmm_strength, 15); // shift so it's all 0xFFFF or 0x0000
+
+ // abs( R0 - L0 )
+ xmm_R0 = _mm_loadu_si128((__m128i *)SrcPtrQ);
+ xmm_L0 = _mm_loadu_si128((__m128i *)SrcPtrP);
+ xmm_diff=_mm_subs_epu8(xmm_R0, xmm_L0);
+ xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_R0);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if( abs( R0 - L0 ) < Alpha )
+ // SSE2 doesn't have unsigned <, so we have to go to short
+ xmm_zero = _mm_setzero_si128();
+ xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero);
+ xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero);
+ xmm_alpha = _mm_set1_epi16((uint16_t)Alpha);
+ xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_alpha);
+ xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_alpha);
+ xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_complo);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ // abs(R0 - R1)
+ xmm_R1 = _mm_loadu_si128((__m128i *)(SrcPtrQ+inc_dim));
+ xmm_diff=_mm_subs_epu8(xmm_R0, xmm_R1);
+ xmm_absdiff=_mm_subs_epu8(xmm_R1, xmm_R0);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if (abs( R0 - R1) < Beta)
+ // SSE2 doesn't have unsigned <, so we have to go to short
+ xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero);
+ xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero);
+ xmm_beta= _mm_set1_epi16((uint16_t)Beta);
+ xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta);
+ xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta);
+ xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_complo);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ // abs(L0 - L1)
+ xmm_L1 = _mm_loadu_si128((__m128i *)(SrcPtrP-inc_dim));
+ xmm_diff=_mm_subs_epu8(xmm_L1, xmm_L0);
+ xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_L1);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if ((abs(L0 - L1) < Beta))
+ // SSE2 doesn't have unsigned <, so we have to go to short
+ xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero);
+ xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero);
+ xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta);
+ xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta);
+ xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_complo);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ pelmatch = match & 0xFF;
+ if (pelmatch)
+ {
+ byte strength = Strength[0];
+
+ xmm_complo = _mm_unpacklo_epi8(xmm_strength, xmm_strength);
+
+ switch(strength)
+ {
+ case 4: // INTRA strong
+ {
+ assert(Strength[4] == 4);
+ IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_alpha, xmm_beta, xmm_complo);
+ }
+ break;
+ default:
+ {
+ int C[2] = { ClipTab[strength] * bitdepth_scale, ClipTab[Strength[4]] * bitdepth_scale };
+ FilterLuma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_beta, C, xmm_complo);
+ }
+ break;
+ }
+ }
+ pelmatch = match & 0xFF00;
+ if (pelmatch)
+ {
+ byte strength = Strength[8];
+
+ xmm_comphi = _mm_unpackhi_epi8(xmm_strength, xmm_strength);
+
+ switch(strength)
+ {
+ case 4: // INTRA strong
+ {
+ assert(Strength[12] == 4);
+ IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_alpha, xmm_beta, xmm_comphi);
+ }
+ break;
+ default:
+ {
+ int C[2] = { ClipTab[strength] * bitdepth_scale, ClipTab[Strength[12]] * bitdepth_scale };
+ FilterLuma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_beta, C, xmm_comphi);
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+void EdgeLoopLuma_Horiz_YUV420(VideoImage *image, const byte strength[4], Macroblock *MbQ, PixelPos pixMB1, Macroblock *MbP)
+{
+ // dir == 1
+ __m128i xmm_L0, xmm_R0, xmm_R1, xmm_L1;
+ __m128i xmm_absdiff, xmm_diff, xmm_alpha, xmm_beta, xmm_comphi, xmm_complo, xmm_zero, xmm_127;
+ __m128i xmm_strength;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int pelmatch;
+ int i;
+__declspec(align(32)) uint8_t Strength[16];
+
+ for (i=0;i<16;i++)
+ {
+Strength[i] = strength[i/4];
+ }
+
+ if (pixMB1.available || (MbQ->DFDisableIdc== 0))
+ {
+ // Average QP of the two blocks
+ int QP = (MbP->qp + MbQ->qp + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int Alpha = ALPHA_TABLE[indexA];
+ if (Alpha)
+ {
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+ int Beta = BETA_TABLE [indexB];
+
+ if (Beta !=0)
+ {
+ int match;
+ const byte *ClipTab = CLIP_TAB [indexA];
+ int inc_dim = image->stride;
+
+ imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * inc_dim + pixMB1.pos_x;
+ imgpel *SrcPtrQ = SrcPtrP + inc_dim;
+
+ xmm_strength = _mm_load_si128((__m128i *)Strength);
+ xmm_127 = _mm_set1_epi8(127);
+ xmm_strength = _mm_adds_epu8(xmm_strength, xmm_127);
+ xmm_strength = _mm_srai_epi16(xmm_strength, 15); // shift so it's all 0xFFFF or 0x0000
+
+ // abs( R0 - L0 )
+ xmm_L0 = _mm_loadu_si128((__m128i *)SrcPtrP);
+ xmm_R0 = _mm_loadu_si128((__m128i *)SrcPtrQ);
+ xmm_diff=_mm_subs_epu8(xmm_R0, xmm_L0);
+ xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_R0);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if( abs( R0 - L0 ) < Alpha )
+ // SSE2 doesn't have unsigned <, so we have to go to short
+ xmm_zero = _mm_setzero_si128();
+ xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero);
+ xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero);
+ xmm_alpha = _mm_set1_epi16((uint16_t)Alpha);
+ xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_alpha);
+ xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_alpha);
+ xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_complo);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ // abs(R0 - R1)
+ xmm_R1 = _mm_loadu_si128((__m128i *)(SrcPtrQ+inc_dim));
+ xmm_diff=_mm_subs_epu8(xmm_R0, xmm_R1);
+ xmm_absdiff=_mm_subs_epu8(xmm_R1, xmm_R0);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if (abs( R0 - R1) < Beta)
+ // SSE2 doesn't have unsigned <, so we have to go to short
+ xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero);
+ xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero);
+ xmm_beta= _mm_set1_epi16((uint16_t)Beta);
+ xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta);
+ xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta);
+ xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_complo);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ // abs(L0 - L1)
+ xmm_L1 = _mm_loadu_si128((__m128i *)(SrcPtrP-inc_dim));
+ xmm_diff=_mm_subs_epu8(xmm_L1, xmm_L0);
+ xmm_absdiff=_mm_subs_epu8(xmm_L0, xmm_L1);
+ xmm_absdiff=_mm_or_si128(xmm_absdiff, xmm_diff);
+
+ // if ((abs(L0 - L1) < Beta))
+ // SSE2 doesn't have unsigned <, so we have to go to short
+ xmm_comphi = _mm_unpackhi_epi8(xmm_absdiff, xmm_zero);
+ xmm_complo = _mm_unpacklo_epi8(xmm_absdiff, xmm_zero);
+ xmm_comphi = _mm_cmplt_epi16(xmm_comphi, xmm_beta);
+ xmm_complo = _mm_cmplt_epi16(xmm_complo, xmm_beta);
+ xmm_complo = _mm_packs_epi16(xmm_complo, xmm_comphi);
+ xmm_strength = _mm_and_si128(xmm_strength, xmm_complo);
+ match = _mm_movemask_epi8(xmm_strength);
+ if (match == 0)
+ return;
+
+ pelmatch = match & 0xFF;
+ if (pelmatch)
+ {
+ byte strength = Strength[0];
+
+ xmm_complo = _mm_unpacklo_epi8(xmm_strength, xmm_strength);
+
+ switch(strength)
+ {
+ case 4: // INTRA strong
+ {
+ assert(Strength[4] == 4);
+ IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_alpha, xmm_beta, xmm_complo);
+ }
+ break;
+ default:
+ {
+ int C[2] = { ClipTab[strength], ClipTab[Strength[4]] };
+ FilterLuma_Horiz_sse2(inc_dim, SrcPtrP, SrcPtrQ, xmm_beta, C, xmm_complo);
+ }
+ break;
+ }
+ }
+ pelmatch = match & 0xFF00;
+ if (pelmatch)
+ {
+ byte strength = Strength[8];
+
+ xmm_comphi = _mm_unpackhi_epi8(xmm_strength, xmm_strength);
+
+ switch(strength)
+ {
+ case 4: // INTRA strong
+ {
+ assert(Strength[12] == 4);
+ IntraStrongFilter_Luma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_alpha, xmm_beta, xmm_comphi);
+ }
+ break;
+ default:
+ {
+ int C[2] = { ClipTab[strength], ClipTab[Strength[12]] };
+ FilterLuma_Horiz_sse2(inc_dim, SrcPtrP+8, SrcPtrQ+8, xmm_beta, C, xmm_comphi);
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+void EdgeLoopLumaNormal_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p)
+{
+ // dir == 1
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int yQ = (edge < MB_BLOCK_SIZE ? edge - 1: 0);
+
+ PixelPos pixMB1;
+ p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_LUMA], &pixMB1);
+
+ if (pixMB1.available || (MbQ->DFDisableIdc== 0))
+ {
+ int bitdepth_scale = pl ? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA];
+
+ Macroblock *MbP = &(p_Vid->mb_data[pixMB1.mb_addr]);
+
+ // Average QP of the two blocks
+ int QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ if (Alpha)
+ {
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+ int Beta = BETA_TABLE [indexB] * bitdepth_scale;
+
+ if (Beta !=0)
+ {
+ PixelPos pixMB2;
+ const byte *ClipTab = CLIP_TAB [indexA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+ int inc_dim = image->stride;
+ int pel;
+ imgpel *SrcPtrQ;
+ imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x;
+
+ p_Vid->getNeighbour0X(MbQ, ++yQ, p_Vid->mb_size[IS_LUMA], &pixMB2);
+ SrcPtrQ = image->base_address + pixMB2.pos_y * image->stride + pixMB2.pos_x;
+
+ for( pel = 0 ; pel < MB_BLOCK_SIZE ; pel+=BLOCK_SIZE)
+ {
+ byte strength = Strength[pel];
+
+ switch(strength)
+ {
+ case 0:
+ break;
+ case 4: // INTRA strong
+ {
+ IntraStrongFilter_Luma_Horiz(inc_dim, SrcPtrP+pel, SrcPtrQ+pel, Alpha, Beta);
+ }
+ break;
+ default:
+ {
+ int C0 = ClipTab[strength] * bitdepth_scale;
+ FilterLuma_Horiz(inc_dim, SrcPtrP+pel, SrcPtrQ+pel, Alpha, Beta, C0, max_imgpel_value);
+ }
+ break;
+ }
+
+ }
+ }
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/filter_luma_vert.c b/Src/h264dec/ldecod/src/filter_luma_vert.c
new file mode 100644
index 00000000..6d994217
--- /dev/null
+++ b/Src/h264dec/ldecod/src/filter_luma_vert.c
@@ -0,0 +1,554 @@
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+#include <mmintrin.h>
+#include <emmintrin.h>
+
+static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ;
+static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ;
+static const byte CLIP_TAB[52][5] =
+{
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1},
+ { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3},
+ { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6},
+ { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16},
+ { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25}
+} ;
+
+static void IntraStrongFilter_Luma_Vert(int p_step, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta)
+{
+
+ int pel;
+ for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP+=p_step, SrcPtrQ+=p_step)
+ {
+ imgpel L0 = SrcPtrP[0];
+ imgpel R0 = SrcPtrQ[0];
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = SrcPtrQ[1];
+ imgpel L1 = SrcPtrP[-1];
+ if ((abs( R0 - R1) < Beta) && (abs(L0 - L1) < Beta))
+ {
+ imgpel R2 = SrcPtrQ[2];
+ imgpel L2 = SrcPtrP[-2];
+
+ int RL0 = L0 + R0;
+ int small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2));
+ int aq = ( abs( R0 - R2) < Beta ) & small_gap;
+ int ap = ( abs( L0 - L2) < Beta ) & small_gap;
+
+ if (ap)
+ {
+ int L1RL0 = L1 + RL0;
+ imgpel L3 = SrcPtrP[-3];
+ SrcPtrP[0] = (imgpel) (( R1 + ((L1RL0) << 1) + L2 + 4) >> 3);
+ SrcPtrP[-1] = (imgpel) (( L2 + L1RL0 + 2) >> 2);
+ SrcPtrP[-2] = (imgpel) ((((L3 + L2) <<1) + L2 + L1RL0 + 4) >> 3);
+ }
+ else
+ {
+ *SrcPtrP = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ;
+ }
+
+ if (aq)
+ {
+ imgpel R3 = SrcPtrQ[3];
+ SrcPtrQ[0] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3);
+ SrcPtrQ[1] = (imgpel) (( R2 + R0 + L0 + R1 + 2) >> 2);
+ SrcPtrQ[2] = (imgpel) ((((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrQ[0] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2);
+ }
+ }
+ }
+ }
+}
+
+static void FilterLuma_Vert(int p_step, imgpel *SrcPtrP, imgpel *SrcPtrQ, int Alpha, int Beta, int C0, int max_imgpel_value)
+{
+ int pel;
+ for (pel = 0; pel < BLOCK_SIZE; pel++, SrcPtrP+=p_step, SrcPtrQ+=p_step)
+ {
+ imgpel L0 = SrcPtrP[0];
+ imgpel R0 = SrcPtrQ[0];
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel R1 = SrcPtrQ[1];
+ if (abs( R0 - R1) < Beta)
+ {
+ imgpel L1 = SrcPtrP[-1];
+ if (abs(L0 - L1) < Beta)
+ {
+ imgpel R2 = SrcPtrQ[2];
+ imgpel L2 = SrcPtrP[-2];
+
+ int RL0 = (L0 + R0 + 1) >> 1;
+ int aq = (abs(R0 - R2) < Beta);
+ int ap = (abs(L0 - L2) < Beta);
+
+ //int C0 = ClipTab[ *Strength ] * bitdepth_scale;
+ int tc0 = (C0 + ap + aq) ;
+ int dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ if( ap )
+ SrcPtrP[-1] += iClip3( -C0, C0, (L2 + RL0 - (L1<<1)) >> 1 );
+ SrcPtrP[0] = (imgpel) iClip1(max_imgpel_value, L0 + dif);
+
+ SrcPtrQ[0] = (imgpel) iClip1(max_imgpel_value, R0 - dif);
+ if( aq )
+ SrcPtrQ[1] += iClip3( -C0, C0, (R2 + RL0 - (R1<<1)) >> 1 );
+ }
+ }
+ }
+ }
+}
+
+void EdgeLoopLumaNormal_Vert(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p)
+{
+ // dir == 0
+ imgpel **Img = image->img;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int xQ = edge - 1;
+
+ PixelPos pixMB1;
+ p_Vid->getNeighbourX0(MbQ, xQ, p_Vid->mb_size[IS_LUMA], &pixMB1);
+
+ if (pixMB1.available || (MbQ->DFDisableIdc== 0))
+ {
+ int bitdepth_scale = pl ? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA];
+ ptrdiff_t p_step = image->stride;
+
+ Macroblock *MbP = &(p_Vid->mb_data[pixMB1.mb_addr]);
+
+ // Average QP of the two blocks
+ int QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+
+ int Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ int Beta = BETA_TABLE [indexB] * bitdepth_scale;
+
+ if (Alpha != 0 && Beta !=0)
+ {
+ PixelPos pixMB2;
+ const byte *ClipTab = CLIP_TAB [indexA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+ int pel;
+ imgpel *SrcPtrQ;
+ imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x;
+
+ p_Vid->getNeighbourX0(MbQ, ++xQ, p_Vid->mb_size[IS_LUMA], &pixMB2);
+ SrcPtrQ = image->base_address + pixMB2.pos_y * image->stride + pixMB2.pos_x;
+
+ for( pel = 0 ; pel < MB_BLOCK_SIZE ; pel+=BLOCK_SIZE)
+ {
+ byte strength = Strength[pel];
+
+ switch(strength)
+ {
+ case 0:
+ break;
+ case 4: // INTRA strong
+ {
+ IntraStrongFilter_Luma_Vert(p_step, SrcPtrP, SrcPtrQ, Alpha, Beta);
+ }
+ break;
+ default:
+ {
+ int C0 = ClipTab[strength] * bitdepth_scale;
+ FilterLuma_Vert(p_step, SrcPtrP, SrcPtrQ, Alpha, Beta, C0, max_imgpel_value);
+ }
+ break;
+ }
+ SrcPtrP += p_step * BLOCK_SIZE;
+ SrcPtrQ += p_step * BLOCK_SIZE;
+ }
+ }
+ }
+}
+
+
+static void FilterLuma_Vert_sse2(int p_step, imgpel *SrcPtrP, int Alpha, int Beta, const uint8_t Strength[4], const byte *ClipTab)
+{
+
+ __m64 mmx_alpha_minus_one = _mm_set1_pi16(Alpha-1), mmx_beta_minus_one = _mm_set1_pi16(Beta-1);
+ __m64 mmx_zero = _mm_setzero_si64(), mmx_one, mmx_four=_mm_set1_pi16(4);
+ __m64 mmx_minus_one;
+ __m64 mmx_absdiff, mmx_diff;
+ __m64 mmx_L0, mmx_L1, mmx_L2, mmx_L0_R0;
+ __m64 mmx_R0, mmx_R1_R2, mmx_R1, mmx_R2;
+ __m64 mmx_load0, mmx_load1, mmx_load2, mmx_load3, mmx_load4, mmx_load5, mmx_load6, mmx_load7, mmx_load8;
+ __m64 mmx_ap, mmx_aq, mmx_C0, mmx_negative_C0, mmx_tc0, mmx_dif, mmx_acc, mmx_match;
+ int match;
+ int i=0;
+
+ mmx_minus_one = _mm_set1_pi32(-1);
+ mmx_one = _mm_sub_pi16(mmx_zero, mmx_minus_one); // dunno if this'll be faster than _mm_set1_pi16 or not
+ SrcPtrP -= 2;
+
+ STAGE:
+
+ while (!Strength[i])
+ {
+ SrcPtrP += p_step << 2;
+ if (i++ == 3) // last stage
+ return;
+ }
+
+ mmx_load0 = (*(__m64 *)(SrcPtrP)); // La2 La1 La0 Ra0 Ra1 Ra2 --- ---
+ mmx_load1 = (*(__m64 *)(SrcPtrP+=p_step)); // Lb2 Lb1 Lb0 Rb0 Rb1 Rb2 --- ---
+ mmx_load4 = _mm_unpacklo_pi8(mmx_load0, mmx_load1); // La2 Lb2 La1 Lb1 La0 Lb0 Ra0 Rb0 *
+ mmx_load2 = (*(__m64 *)(SrcPtrP+=p_step)); // Lc2 Lc1 Lc0 Rc0 Rc1 Rc2 --- ---
+ mmx_load3 = (*(__m64 *)(SrcPtrP+=p_step)); // Ld2 Ld1 Ld0 Rd0 Rd1 Rd2 --- ---
+ SrcPtrP+=p_step;
+ mmx_load5 = _mm_unpacklo_pi8(mmx_load2, mmx_load3); // Lc2 Ld2 Lc1 Ld1 Lc0 Ld0 Rc0 Rd0 *
+ mmx_L0_R0 = _mm_unpackhi_pi16(mmx_load4, mmx_load5); // La0 Lb0 Lc0 Ld0 Ra0 Rb0 Rc0 Rd0
+
+ // abs( R0 - L0 ) < Alpha
+ // MMX doesn't have unsigned compare, so we have to go to short
+ mmx_L0 = _mm_unpacklo_pi8(mmx_L0_R0, mmx_zero); // La0 Lb0 Lc0 Ld0
+ mmx_R0 = _mm_unpackhi_pi8(mmx_L0_R0, mmx_zero); // Ra0 Rb0 Rc0 Rd0
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_L0);
+ mmx_absdiff =_mm_subs_pu16(mmx_L0, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_match = _mm_cmpgt_pi16(mmx_absdiff, mmx_alpha_minus_one); // 1's in any words we don't have to do
+ mmx_match = _mm_xor_si64(mmx_match, mmx_minus_one);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 3) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ mmx_load6 = _mm_unpackhi_pi8(mmx_load0, mmx_load1); // Ra1 Rb1 Ra2 Rb2 --- --- --- --- *
+ mmx_load7 = _mm_unpackhi_pi8(mmx_load2, mmx_load3); // Rc1 Rd1 Rc2 Rd2 --- --- --- --- *
+ mmx_R1_R2 = _mm_unpacklo_pi16(mmx_load6, mmx_load7); // Ra1 Rb1 Rc1 Rd1 Ra2 Rb2 Rc2 Rd2
+
+ // abs( R0 - R1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_R0 already populated
+ mmx_R1 = _mm_unpacklo_pi8(mmx_R1_R2, mmx_zero); // Ra1 Rb1 Rc1 Rd1
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R1);
+ mmx_absdiff =_mm_subs_pu16(mmx_R1, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 3) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // abs(L0 - L1) < Beta
+ // MMX doesn't have unsigned compare, so we have to go to short
+ // mmx_L0 already populated
+ // mmx_load4: La2 Lb2 La1 Lb1 La0 Lb0 --- ---
+ // mmx_load5: Lc2 Ld2 Lc1 Ld1 Lc0 Ld0 --- ---
+ mmx_load4 = _mm_unpacklo_pi16(mmx_load4, mmx_load5); // La2 Lb2 Lc2 Ld2 La1 Lb1 Lc1 Ld1
+ mmx_L1 = _mm_unpackhi_pi8(mmx_load4, mmx_zero); // La1 Lb1 Lc1 Ld1
+ mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L1);
+ mmx_absdiff =_mm_subs_pu16(mmx_L1, mmx_L0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_absdiff = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_absdiff = _mm_xor_si64(mmx_absdiff, mmx_minus_one);
+ mmx_match = _mm_and_si64(mmx_match, mmx_absdiff);
+ match = _mm_movemask_pi8(mmx_match);
+ if (match == 0)
+ {
+ if (i++ == 3) // last stage
+ return;
+
+ goto STAGE; // start the process over from next position
+ }
+
+ // ok, now time to performn the actual calculation. hope it was worth it!!
+
+ // ap = (abs(L0 - L2) < Beta);
+ // finish loading L2
+ mmx_L2 = _mm_unpacklo_pi8(mmx_load4, mmx_zero); // La1 Lb1 Lc1 Ld1
+ mmx_diff=_mm_subs_pu16(mmx_L0, mmx_L2);
+ mmx_absdiff =_mm_subs_pu16(mmx_L2, mmx_L0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_ap = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_tc0 = _mm_add_pi16(mmx_ap, mmx_one); // a clever trick. add one to essential do !mmx_absdiff (since mmx_diff will == 0xFFFF when true)
+
+ // aq = (abs(R0 - R2) < Beta);
+ // finish loading R2
+ // mmx_R1_R2: Ra1 Rb1 Rc1 Rd1 Ra2 Rb2 Rc2 Rd2
+ mmx_R2 = _mm_unpackhi_pi8(mmx_R1_R2, mmx_zero); // Ra2 Rb2 Rc2 Rd2
+ mmx_diff=_mm_subs_pu16(mmx_R0, mmx_R2);
+ mmx_absdiff =_mm_subs_pu16(mmx_R2, mmx_R0);
+ mmx_absdiff =_mm_or_si64(mmx_absdiff, mmx_diff);
+ mmx_aq = _mm_cmpgt_pi16(mmx_absdiff, mmx_beta_minus_one);
+ mmx_tc0 = _mm_add_pi16(mmx_tc0, _mm_add_pi16(mmx_aq, mmx_one)); // a clever trick. add one to essential do !mmx_absdiff (since mmx_diff will == 0xFFFF when true)
+
+ // tc0 = (C0 + ap + aq) ;
+ mmx_C0 = _mm_set1_pi16(ClipTab[Strength[i]]);
+ mmx_negative_C0 = _mm_sub_pi16(mmx_zero, mmx_C0);
+ mmx_tc0 = _mm_add_pi16(mmx_tc0, mmx_C0);
+
+
+ // dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+ mmx_dif = mmx_R0;
+ mmx_dif = _mm_sub_pi16(mmx_dif, mmx_L0);
+ mmx_dif = _mm_slli_pi16(mmx_dif, 2);
+ mmx_dif = _mm_add_pi16(mmx_dif, mmx_L1);
+ mmx_dif = _mm_sub_pi16(mmx_dif, mmx_R1);
+ mmx_dif = _mm_add_pi16(mmx_dif, mmx_four);
+ mmx_dif = _mm_srai_pi16(mmx_dif, 3);
+ mmx_dif = _mm_min_pi16(mmx_dif, mmx_tc0);
+ mmx_tc0 = _mm_sub_pi16(mmx_zero, mmx_tc0);
+ mmx_dif = _mm_max_pi16(mmx_dif, mmx_tc0);
+ mmx_dif = _mm_and_si64(mmx_dif, mmx_match);
+
+ // TODO: benski> is it worth checking for_mm_movemask_pi8(ap) to see if we can skip this?
+ // if( ap ) L1 += iClip3( -C0, C0, (L2 + ((L0 + R0 + 1) >> 1) - (L1<<1)) >> 1 );
+ mmx_acc = mmx_L0;
+ mmx_acc = _mm_add_pi16(mmx_acc, mmx_R0);
+ mmx_acc = _mm_add_pi16(mmx_acc, mmx_one);
+ mmx_acc = _mm_srai_pi16(mmx_acc, 1);
+ mmx_acc = _mm_sub_pi16(mmx_acc, mmx_L1);
+ mmx_acc = _mm_sub_pi16(mmx_acc, mmx_L1);
+ mmx_acc = _mm_add_pi16(mmx_acc, mmx_L2);
+ mmx_acc = _mm_srai_pi16(mmx_acc, 1);
+ mmx_acc = _mm_min_pi16(mmx_acc, mmx_C0);
+ mmx_acc = _mm_max_pi16(mmx_acc, mmx_negative_C0);
+ mmx_acc = _mm_andnot_si64(mmx_ap, mmx_acc);
+ mmx_acc = _mm_and_si64(mmx_acc, mmx_match);
+ mmx_L1 = _mm_add_pi16(mmx_L1, mmx_acc);
+
+
+ //if( aq ) R1 += iClip3( -C0, C0, (R2 + RL0 - (R1<<1)) >> 1 );
+ mmx_acc = mmx_L0;
+ mmx_acc = _mm_add_pi16(mmx_acc, mmx_R0);
+ mmx_acc = _mm_add_pi16(mmx_acc, mmx_one);
+ mmx_acc = _mm_srai_pi16(mmx_acc, 1);
+ mmx_acc = _mm_sub_pi16(mmx_acc, mmx_R1);
+ mmx_acc = _mm_sub_pi16(mmx_acc, mmx_R1);
+ mmx_acc = _mm_add_pi16(mmx_acc, mmx_R2);
+ mmx_acc = _mm_srai_pi16(mmx_acc, 1);
+ mmx_acc = _mm_min_pi16(mmx_acc, mmx_C0);
+ mmx_acc = _mm_max_pi16(mmx_acc, mmx_negative_C0);
+ mmx_acc = _mm_andnot_si64(mmx_aq, mmx_acc);
+ mmx_acc = _mm_and_si64(mmx_acc, mmx_match);
+ mmx_R1 = _mm_add_pi16(mmx_R1, mmx_acc);
+
+ // L0 = (imgpel) iClip1(max_imgpel_value, L0 + dif);
+ mmx_L0 = _mm_add_pi16(mmx_L0, mmx_dif);
+
+ // R0 = (imgpel) iClip1(max_imgpel_value, R0 - dif);
+ mmx_R0 = _mm_sub_pi16(mmx_R0, mmx_dif);
+
+
+ // now for the super-exciting fun of getting this data back into memory
+ SrcPtrP -= 4*p_step;
+ //SrcPtrQ -= 4*p_step;
+ SrcPtrP++;
+
+ // rotate 4x4 matrix
+ mmx_load1 = _mm_unpacklo_pi16(mmx_L1, mmx_R0); // 00 20 01 21
+ mmx_load2 = _mm_unpacklo_pi16(mmx_L0, mmx_R1); // 10 30 11 31
+ mmx_load3 = _mm_unpackhi_pi16(mmx_L1, mmx_R0); // 02 22 03 23
+ mmx_load4 = _mm_unpackhi_pi16(mmx_L0, mmx_R1); // 12 32 13 33
+ mmx_load5 = _mm_unpacklo_pi16(mmx_load1, mmx_load2); // 00 10 20 30
+ mmx_load6 = _mm_unpackhi_pi16(mmx_load1, mmx_load2); // 01 11 21 31
+ mmx_load7 = _mm_unpacklo_pi16(mmx_load3, mmx_load4); // 02 12 22 32
+ mmx_load8 = _mm_unpackhi_pi16(mmx_load3, mmx_load4); // 03 13 23 33
+ mmx_load5 = _mm_packs_pu16(mmx_load5, mmx_load5);
+ mmx_load6 = _mm_packs_pu16(mmx_load6, mmx_load6);
+ mmx_load7 = _mm_packs_pu16(mmx_load7, mmx_load7);
+ mmx_load8 = _mm_packs_pu16(mmx_load8, mmx_load8);
+
+ //mmx_load1 = _mm_setr_pi16(0x8080, 0x80, 0, 0);
+ *(int *)SrcPtrP = _mm_cvtsi64_si32(mmx_load5);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load6);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load7);
+ *(int *)(SrcPtrP+=p_step) = _mm_cvtsi64_si32(mmx_load8);
+
+ if (i++ == 3)
+ return;
+
+ //SrcPtrQ += 2;
+ SrcPtrP += p_step;
+ //SrcPtrQ += p_step;
+ SrcPtrP--;
+ goto STAGE; // next stage
+}
+
+/* assumptions: YUV 420, getNonAffNeighbour */
+void EdgeLoopLuma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, PixelPos pixMB1, Macroblock *MbP)
+{
+ // dir == 0
+ if (MbQ->DFDisableIdc== 0)
+ {
+ ptrdiff_t p_step = image->stride;
+
+ // Average QP of the two blocks
+ int QP = (MbP->qp + MbQ->qp + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+
+ int Alpha = ALPHA_TABLE[indexA];
+ int Beta = BETA_TABLE [indexB];
+
+ if (Alpha != 0 && Beta !=0)
+ {
+ imgpel *SrcPtrP = image->base_address + pixMB1.pos_y * image->stride + pixMB1.pos_x;
+
+ if (Strength[0] == 4) // if strong filter is used, all blocks will be strong
+ {
+ imgpel *SrcPtrQ = SrcPtrP+1;
+ int pel;
+ for( pel = 0 ; pel < BLOCK_SIZE ; pel++)
+ {
+ IntraStrongFilter_Luma_Vert(p_step, SrcPtrP, SrcPtrQ, Alpha, Beta);
+ SrcPtrP += p_step * BLOCK_SIZE;
+ SrcPtrQ += p_step * BLOCK_SIZE;
+ }
+ }
+ else
+ {
+ const byte *ClipTab = CLIP_TAB [indexA];
+ FilterLuma_Vert_sse2(p_step, SrcPtrP, Alpha, Beta, Strength, ClipTab);
+ }
+ }
+ }
+}
+
+void EdgeLoopLumaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p)
+{
+ // dir == 0
+ imgpel **Img = image->img;
+ int width = image->stride;
+ int pel, ap = 0, aq = 0, Strng ;
+
+ int C0, tc0, dif;
+ imgpel L0, R0;
+ int Alpha = 0, Beta = 0 ;
+ const byte* ClipTab = NULL;
+ int small_gap;
+ int indexA, indexB;
+
+ int QP;
+ int xQ, yQ;
+
+ PixelPos pixP, pixQ;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int bitdepth_scale = p_Vid->bitdepth_scale[IS_LUMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[PLANE_Y];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+
+ Macroblock *MbP;
+ imgpel *SrcPtrP, *SrcPtrQ;
+
+ for( pel = 0 ; pel < MB_BLOCK_SIZE ; ++pel )
+ {
+ xQ = edge;
+ yQ = pel;
+ getAffNeighbourXPLuma(MbQ, xQ - 1, yQ, &pixP);
+
+ if (pixP.available || (MbQ->DFDisableIdc== 0))
+ {
+ if( (Strng = Strength[pel]) != 0)
+ {
+ getAffNeighbourXPLuma(MbQ, xQ, yQ, &pixQ); // TODO: PP
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+
+ SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+
+ // Average QP of the two blocks
+ QP = (MbP->qp + MbQ->qp + 1) >> 1;
+
+ indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ Beta = BETA_TABLE [indexB] * bitdepth_scale;
+ ClipTab = CLIP_TAB[indexA];
+
+ L0 = SrcPtrP[0] ;
+ R0 = SrcPtrQ[0] ;
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel L1 = SrcPtrP[-1];
+ imgpel R1 = SrcPtrQ[ 1];
+ if ((abs( R0 - R1) < Beta ) && (abs(L0 - L1) < Beta ))
+ {
+ imgpel L2 = SrcPtrP[-2];
+ imgpel R2 = SrcPtrQ[ 2];
+ if(Strng == 4 ) // INTRA strong filtering
+ {
+ int RL0 = L0 + R0;
+ small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2));
+ aq = ( abs( R0 - R2) < Beta ) & small_gap;
+ ap = ( abs( L0 - L2) < Beta ) & small_gap;
+
+ if (ap)
+ {
+ imgpel L3 = SrcPtrP[-3];
+ SrcPtrP[-2] = (imgpel) ((((L3 + L2) << 1) + L2 + L1 + RL0 + 4) >> 3);
+ SrcPtrP[-1 ] = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2);
+ SrcPtrP[ 0 ] = (imgpel) (( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrP[ 0 ] = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ;
+ }
+
+ if (aq)
+ {
+ imgpel R3 = SrcPtrQ[ 3];
+ SrcPtrQ[ 0 ] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3);
+ SrcPtrQ[ 1 ] = (imgpel) (( R2 + R0 + R1 + L0 + 2) >> 2);
+ SrcPtrQ[ 2 ] = (imgpel) ((((R3 + R2) << 1) + R2 + R1 + RL0 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrQ[ 0 ] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2);
+ }
+ }
+ else // normal filtering
+ {
+ int RL0 = (L0 + R0 + 1) >> 1;
+ aq = (abs( R0 - R2) < Beta);
+ ap = (abs( L0 - L2) < Beta);
+
+ C0 = ClipTab[ Strng ] * bitdepth_scale;
+ tc0 = (C0 + ap + aq) ;
+ dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3) ;
+
+ if( ap )
+ *(SrcPtrP - 1) += iClip3( -C0, C0, ( L2 + RL0 - (L1 << 1)) >> 1 ) ;
+
+ *SrcPtrP = (imgpel) iClip1 (max_imgpel_value, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 (max_imgpel_value, R0 - dif) ;
+
+ if( aq )
+ *(SrcPtrQ + 1) += iClip3( -C0, C0, ( R2 + RL0 - (R1 << 1)) >> 1 ) ;
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/fmo.c b/Src/h264dec/ldecod/src/fmo.c
new file mode 100644
index 00000000..cb32230e
--- /dev/null
+++ b/Src/h264dec/ldecod/src/fmo.c
@@ -0,0 +1,552 @@
+
+/*!
+ *****************************************************************************
+ *
+ * \file fmo.c
+ *
+ * \brief
+ * Support for Flexible Macroblock Ordering (FMO)
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger stewe@cs.tu-berlin.de
+ * - Karsten Suehring suehring@hhi.de
+ ******************************************************************************
+ */
+
+#include "global.h"
+#include "elements.h"
+#include "defines.h"
+#include "header.h"
+#include "fmo.h"
+
+//#define PRINT_FMO_MAPS
+
+static void FmoGenerateType0MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+static void FmoGenerateType1MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+static void FmoGenerateType2MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+static void FmoGenerateType3MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+static void FmoGenerateType4MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+static void FmoGenerateType5MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+static void FmoGenerateType6MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits );
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generates p_Vid->MapUnitToSliceGroupMap
+ * Has to be called every time a new Picture Parameter Set is used
+ *
+ * \param p_Vid
+ * image encoding parameters for current picture
+ *
+ ************************************************************************
+ */
+static int FmoGenerateMapUnitToSliceGroupMap (VideoParameters *p_Vid)
+{
+ seq_parameter_set_rbsp_t* sps = p_Vid->active_sps;
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+
+ unsigned int NumSliceGroupMapUnits;
+
+ NumSliceGroupMapUnits = (sps->pic_height_in_map_units_minus1+1)* (sps->pic_width_in_mbs_minus1+1);
+
+ if (pps->slice_group_map_type == 6)
+ {
+ if ((pps->pic_size_in_map_units_minus1 + 1) != NumSliceGroupMapUnits)
+ {
+ error ("wrong pps->pic_size_in_map_units_minus1 for used SPS and FMO type 6", 500);
+ }
+ }
+
+ // allocate memory for p_Vid->MapUnitToSliceGroupMap
+ if (p_Vid->MapUnitToSliceGroupMap)
+ free (p_Vid->MapUnitToSliceGroupMap);
+ if ((p_Vid->MapUnitToSliceGroupMap = malloc ((NumSliceGroupMapUnits) * sizeof (int))) == NULL)
+ {
+ printf ("cannot allocated %d bytes for p_Vid->MapUnitToSliceGroupMap, exit\n", (int) ( (pps->pic_size_in_map_units_minus1+1) * sizeof (int)));
+ exit (-1);
+ }
+
+ if (pps->num_slice_groups_minus1 == 0) // only one slice group
+ {
+ memset (p_Vid->MapUnitToSliceGroupMap, 0, NumSliceGroupMapUnits * sizeof (int));
+ return 0;
+ }
+
+ switch (pps->slice_group_map_type)
+ {
+ case 0:
+ FmoGenerateType0MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ case 1:
+ FmoGenerateType1MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ case 2:
+ FmoGenerateType2MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ case 3:
+ FmoGenerateType3MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ case 4:
+ FmoGenerateType4MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ case 5:
+ FmoGenerateType5MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ case 6:
+ FmoGenerateType6MapUnitMap (p_Vid, NumSliceGroupMapUnits);
+ break;
+ default:
+ printf ("Illegal slice_group_map_type %d , exit \n", (int) pps->slice_group_map_type);
+ exit (-1);
+ }
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generates p_Vid->MbToSliceGroupMap from p_Vid->MapUnitToSliceGroupMap
+ *
+ * \param p_Vid
+ * image encoding parameters for current picture
+ *
+ ************************************************************************
+ */
+static int FmoGenerateMbToSliceGroupMap (VideoParameters *p_Vid)
+{
+ seq_parameter_set_rbsp_t* sps = p_Vid->active_sps;
+
+ unsigned i;
+
+ // allocate memory for p_Vid->MbToSliceGroupMap
+ if (p_Vid->MbToSliceGroupMap)
+ free (p_Vid->MbToSliceGroupMap);
+
+ if ((p_Vid->MbToSliceGroupMap = malloc ((p_Vid->PicSizeInMbs) * sizeof (int))) == NULL)
+ {
+ printf ("cannot allocate %d bytes for p_Vid->MbToSliceGroupMap, exit\n", (int) ((p_Vid->PicSizeInMbs) * sizeof (int)));
+ exit (-1);
+ }
+
+
+ if ((sps->frame_mbs_only_flag)|| p_Vid->field_pic_flag)
+ {
+ for (i=0; i<p_Vid->PicSizeInMbs; i++)
+ {
+ p_Vid->MbToSliceGroupMap[i] = p_Vid->MapUnitToSliceGroupMap[i];
+ }
+ }
+ else
+ if (sps->mb_adaptive_frame_field_flag && (!p_Vid->field_pic_flag))
+ {
+ for (i=0; i<p_Vid->PicSizeInMbs; i++)
+ {
+ p_Vid->MbToSliceGroupMap[i] = p_Vid->MapUnitToSliceGroupMap[i/2];
+ }
+ }
+ else
+ {
+ for (i=0; i<p_Vid->PicSizeInMbs; i++)
+ {
+ p_Vid->MbToSliceGroupMap[i] = p_Vid->MapUnitToSliceGroupMap[(i/(2*p_Vid->PicWidthInMbs))*p_Vid->PicWidthInMbs+(i%p_Vid->PicWidthInMbs)];
+ }
+ }
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * FMO initialization: Generates p_Vid->MapUnitToSliceGroupMap and p_Vid->MbToSliceGroupMap.
+ *
+ * \param p_Vid
+ * image encoding parameters for current picture
+ ************************************************************************
+ */
+int fmo_init(VideoParameters *p_Vid)
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+
+#ifdef PRINT_FMO_MAPS
+ unsigned i,j;
+#endif
+
+ FmoGenerateMapUnitToSliceGroupMap(p_Vid);
+ FmoGenerateMbToSliceGroupMap(p_Vid);
+
+ p_Vid->NumberOfSliceGroups = pps->num_slice_groups_minus1 + 1;
+
+#ifdef PRINT_FMO_MAPS
+ printf("\n");
+ printf("FMO Map (Units):\n");
+
+ for (j=0; j<p_Vid->PicHeightInMapUnits; j++)
+ {
+ for (i=0; i<p_Vid->PicWidthInMbs; i++)
+ {
+ printf("%c",48+p_Vid->MapUnitToSliceGroupMap[i+j*p_Vid->PicWidthInMbs]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ printf("FMO Map (Mb):\n");
+
+ for (j=0; j<p_Vid->PicHeightInMbs; j++)
+ {
+ for (i=0; i<p_Vid->PicWidthInMbs; i++)
+ {
+ printf("%c",48 + p_Vid->MbToSliceGroupMap[i + j * p_Vid->PicWidthInMbs]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+
+#endif
+
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Free memory allocated by FMO functions
+ ************************************************************************
+ */
+int FmoFinit(VideoParameters *p_Vid)
+{
+ if (p_Vid->MbToSliceGroupMap)
+ {
+ free (p_Vid->MbToSliceGroupMap);
+ p_Vid->MbToSliceGroupMap = NULL;
+ }
+ if (p_Vid->MapUnitToSliceGroupMap)
+ {
+ free (p_Vid->MapUnitToSliceGroupMap);
+ p_Vid->MapUnitToSliceGroupMap = NULL;
+ }
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * FmoGetNumberOfSliceGroup(p_Vid)
+ *
+ * \par p_Vid:
+ * VideoParameters
+ ************************************************************************
+ */
+int FmoGetNumberOfSliceGroup(VideoParameters *p_Vid)
+{
+ return p_Vid->NumberOfSliceGroups;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * FmoGetLastMBOfPicture(p_Vid)
+ * returns the macroblock number of the last MB in a picture. This
+ * mb happens to be the last macroblock of the picture if there is only
+ * one slice group
+ *
+ * \par Input:
+ * None
+ ************************************************************************
+ */
+int FmoGetLastMBOfPicture(VideoParameters *p_Vid)
+{
+ return FmoGetLastMBInSliceGroup (p_Vid, FmoGetNumberOfSliceGroup(p_Vid)-1);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * FmoGetLastMBInSliceGroup: Returns MB number of last MB in SG
+ *
+ * \par Input:
+ * SliceGroupID (0 to 7)
+ ************************************************************************
+ */
+
+int FmoGetLastMBInSliceGroup (VideoParameters *p_Vid, int SliceGroup)
+{
+ int i;
+
+ for (i=p_Vid->PicSizeInMbs-1; i>=0; i--)
+ if (FmoGetSliceGroupId (p_Vid, i) == SliceGroup)
+ return i;
+ return -1;
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Returns SliceGroupID for a given MB
+ *
+ * \param p_Vid
+ * image encoding parameters for current picture
+ * \param mb
+ * Macroblock number (in scan order)
+ ************************************************************************
+ */
+int FmoGetSliceGroupId (VideoParameters *p_Vid, int mb)
+{
+ assert (mb < (int) p_Vid->PicSizeInMbs);
+ assert (p_Vid->MbToSliceGroupMap != NULL);
+ return p_Vid->MbToSliceGroupMap[mb];
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next
+ * MB in the (scattered) Slice, -1 if the slice is finished
+ * \param p_Vid
+ * image encoding parameters for current picture
+ *
+ * \param CurrentMbNr
+ * number of the current macroblock
+ ************************************************************************
+ */
+int FmoGetNextMBNr (VideoParameters *p_Vid, int CurrentMbNr)
+{
+ int SliceGroup = FmoGetSliceGroupId (p_Vid, CurrentMbNr);
+
+ while (++CurrentMbNr<(int)p_Vid->PicSizeInMbs && p_Vid->MbToSliceGroupMap [CurrentMbNr] != SliceGroup)
+ ;
+
+ if (CurrentMbNr >= (int)p_Vid->PicSizeInMbs)
+ return -1; // No further MB in this slice (could be end of picture)
+ else
+ return CurrentMbNr;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate interleaved slice group map type MapUnit map (type 0)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType0MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+ unsigned iGroup, j;
+ unsigned i = 0;
+ do
+ {
+ for( iGroup = 0;
+ (iGroup <= pps->num_slice_groups_minus1) && (i < PicSizeInMapUnits);
+ i += pps->run_length_minus1[iGroup++] + 1 )
+ {
+ for( j = 0; j <= pps->run_length_minus1[ iGroup ] && i + j < PicSizeInMapUnits; j++ )
+ p_Vid->MapUnitToSliceGroupMap[i+j] = iGroup;
+ }
+ }
+ while( i < PicSizeInMapUnits );
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate dispersed slice group map type MapUnit map (type 1)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType1MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+ unsigned i;
+ for( i = 0; i < PicSizeInMapUnits; i++ )
+ {
+ p_Vid->MapUnitToSliceGroupMap[i] = ((i%p_Vid->PicWidthInMbs)+(((i/p_Vid->PicWidthInMbs)*(pps->num_slice_groups_minus1+1))/2))
+ %(pps->num_slice_groups_minus1+1);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate foreground with left-over slice group map type MapUnit map (type 2)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType2MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+ int iGroup;
+ unsigned i, x, y;
+ unsigned yTopLeft, xTopLeft, yBottomRight, xBottomRight;
+
+ for( i = 0; i < PicSizeInMapUnits; i++ )
+ p_Vid->MapUnitToSliceGroupMap[ i ] = pps->num_slice_groups_minus1;
+
+ for( iGroup = pps->num_slice_groups_minus1 - 1 ; iGroup >= 0; iGroup-- )
+ {
+ yTopLeft = pps->top_left[ iGroup ] / p_Vid->PicWidthInMbs;
+ xTopLeft = pps->top_left[ iGroup ] % p_Vid->PicWidthInMbs;
+ yBottomRight = pps->bottom_right[ iGroup ] / p_Vid->PicWidthInMbs;
+ xBottomRight = pps->bottom_right[ iGroup ] % p_Vid->PicWidthInMbs;
+ for( y = yTopLeft; y <= yBottomRight; y++ )
+ for( x = xTopLeft; x <= xBottomRight; x++ )
+ p_Vid->MapUnitToSliceGroupMap[ y * p_Vid->PicWidthInMbs + x ] = iGroup;
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate box-out slice group map type MapUnit map (type 3)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType3MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+ unsigned i, k;
+ int leftBound, topBound, rightBound, bottomBound;
+ int x, y, xDir, yDir;
+ int mapUnitVacant;
+
+ unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * p_Vid->slice_group_change_cycle, PicSizeInMapUnits);
+
+ for( i = 0; i < PicSizeInMapUnits; i++ )
+ p_Vid->MapUnitToSliceGroupMap[ i ] = 2;
+
+ x = ( p_Vid->PicWidthInMbs - pps->slice_group_change_direction_flag ) / 2;
+ y = ( p_Vid->PicHeightInMapUnits - pps->slice_group_change_direction_flag ) / 2;
+
+ leftBound = x;
+ topBound = y;
+ rightBound = x;
+ bottomBound = y;
+
+ xDir = pps->slice_group_change_direction_flag - 1;
+ yDir = pps->slice_group_change_direction_flag;
+
+ for( k = 0; k < PicSizeInMapUnits; k += mapUnitVacant )
+ {
+ mapUnitVacant = ( p_Vid->MapUnitToSliceGroupMap[ y * p_Vid->PicWidthInMbs + x ] == 2 );
+ if( mapUnitVacant )
+ p_Vid->MapUnitToSliceGroupMap[ y * p_Vid->PicWidthInMbs + x ] = ( k >= mapUnitsInSliceGroup0 );
+
+ if( xDir == -1 && x == leftBound )
+ {
+ leftBound = imax( leftBound - 1, 0 );
+ x = leftBound;
+ xDir = 0;
+ yDir = 2 * pps->slice_group_change_direction_flag - 1;
+ }
+ else
+ if( xDir == 1 && x == rightBound )
+ {
+ rightBound = imin( rightBound + 1, (int)p_Vid->PicWidthInMbs - 1 );
+ x = rightBound;
+ xDir = 0;
+ yDir = 1 - 2 * pps->slice_group_change_direction_flag;
+ }
+ else
+ if( yDir == -1 && y == topBound )
+ {
+ topBound = imax( topBound - 1, 0 );
+ y = topBound;
+ xDir = 1 - 2 * pps->slice_group_change_direction_flag;
+ yDir = 0;
+ }
+ else
+ if( yDir == 1 && y == bottomBound )
+ {
+ bottomBound = imin( bottomBound + 1, (int)p_Vid->PicHeightInMapUnits - 1 );
+ y = bottomBound;
+ xDir = 2 * pps->slice_group_change_direction_flag - 1;
+ yDir = 0;
+ }
+ else
+ {
+ x = x + xDir;
+ y = y + yDir;
+ }
+ }
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate raster scan slice group map type MapUnit map (type 4)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType4MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+
+ unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * p_Vid->slice_group_change_cycle, PicSizeInMapUnits);
+ unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0;
+
+ unsigned i;
+
+ for( i = 0; i < PicSizeInMapUnits; i++ )
+ if( i < sizeOfUpperLeftGroup )
+ p_Vid->MapUnitToSliceGroupMap[ i ] = pps->slice_group_change_direction_flag;
+ else
+ p_Vid->MapUnitToSliceGroupMap[ i ] = 1 - pps->slice_group_change_direction_flag;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate wipe slice group map type MapUnit map (type 5)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType5MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+
+ unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * p_Vid->slice_group_change_cycle, PicSizeInMapUnits);
+ unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0;
+
+ unsigned i,j, k = 0;
+
+ for( j = 0; j < p_Vid->PicWidthInMbs; j++ )
+ for( i = 0; i < p_Vid->PicHeightInMapUnits; i++ )
+ if( k++ < sizeOfUpperLeftGroup )
+ p_Vid->MapUnitToSliceGroupMap[ i * p_Vid->PicWidthInMbs + j ] = pps->slice_group_change_direction_flag;
+ else
+ p_Vid->MapUnitToSliceGroupMap[ i * p_Vid->PicWidthInMbs + j ] = 1 - pps->slice_group_change_direction_flag;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate explicit slice group map type MapUnit map (type 6)
+ *
+ ************************************************************************
+ */
+static void FmoGenerateType6MapUnitMap (VideoParameters *p_Vid, unsigned PicSizeInMapUnits )
+{
+ pic_parameter_set_rbsp_t* pps = p_Vid->active_pps;
+ unsigned i;
+ for (i=0; i<PicSizeInMapUnits; i++)
+ {
+ p_Vid->MapUnitToSliceGroupMap[i] = pps->slice_group_id[i];
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/header.c b/Src/h264dec/ldecod/src/header.c
new file mode 100644
index 00000000..1823d82b
--- /dev/null
+++ b/Src/h264dec/ldecod/src/header.c
@@ -0,0 +1,857 @@
+
+/*!
+ *************************************************************************************
+ * \file header.c
+ *
+ * \brief
+ * H.264 Slice headers
+ *
+ *************************************************************************************
+ */
+
+#include "global.h"
+#include "elements.h"
+#include "defines.h"
+#include "fmo.h"
+#include "vlc.h"
+#include "mbuffer.h"
+#include "header.h"
+
+#include "ctx_tables.h"
+
+
+#if TRACE
+#define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE)
+#else
+#define SYMTRACESTRING(s) // do nothing
+#endif
+
+static void ref_pic_list_reordering(Slice *currSlice);
+static void pred_weight_table(Slice *currSlice);
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * calculate Ceil(Log2(uiVal))
+ ************************************************************************
+ */
+unsigned CeilLog2( unsigned uiVal)
+{
+ unsigned uiTmp = uiVal-1;
+ unsigned uiRet = 0;
+
+ while( uiTmp != 0 )
+ {
+ uiTmp >>= 1;
+ uiRet++;
+ }
+ return uiRet;
+}
+
+unsigned CeilLog2_sf( unsigned uiVal)
+{
+ unsigned uiTmp = uiVal-1;
+ unsigned uiRet = 0;
+
+ while( uiTmp > 0 )
+ {
+ uiTmp >>= 1;
+ uiRet++;
+ }
+ return uiRet;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * read the first part of the header (only the pic_parameter_set_id)
+ * \return
+ * Length of the first part of the slice header (in bits)
+ ************************************************************************
+ */
+void FirstPartOfSliceHeader(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER];
+ DataPartition *partition = &(currSlice->partArr[dP_nr]);
+ Bitstream *currStream = partition->bitstream;
+ int tmp;
+
+ // Get first_mb_in_slice
+ currSlice->start_mb_nr = ue_v ("SH: first_mb_in_slice", currStream);
+
+ tmp = ue_v ("SH: slice_type", currStream);
+
+ if (tmp > 4) tmp -= 5;
+
+ p_Vid->type = currSlice->slice_type = (SliceType) tmp;
+
+ currSlice->pic_parameter_set_id = ue_v ("SH: pic_parameter_set_id", currStream);
+
+ if( p_Vid->separate_colour_plane_flag )
+ p_Vid->colour_plane_id = u_v (2, "SH: colour_plane_id", currStream);
+ else
+ p_Vid->colour_plane_id = PLANE_Y;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * read the scond part of the header (without the pic_parameter_set_id
+ * \return
+ * Length of the second part of the Slice header in bits
+ ************************************************************************
+ */
+void RestOfSliceHeader(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ InputParameters *p_Inp = currSlice->p_Inp;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER];
+ DataPartition *partition = &(currSlice->partArr[dP_nr]);
+ Bitstream *currStream = partition->bitstream;
+
+ int val, len;
+
+ p_Vid->frame_num = u_v (active_sps->log2_max_frame_num_minus4 + 4, "SH: frame_num", currStream);
+
+ /* Tian Dong: frame_num gap processing, if found */
+ if (p_Vid->idr_flag)
+ {
+ p_Vid->pre_frame_num = p_Vid->frame_num;
+ // picture error concealment
+ p_Vid->last_ref_pic_poc = 0;
+ assert(p_Vid->frame_num == 0);
+ }
+
+ if (active_sps->frame_mbs_only_flag)
+ {
+ p_Vid->structure = FRAME;
+ p_Vid->field_pic_flag=0;
+ }
+ else
+ {
+ // field_pic_flag u(1)
+ p_Vid->field_pic_flag = u_1("SH: field_pic_flag", currStream);
+ if (p_Vid->field_pic_flag)
+ {
+ // bottom_field_flag u(1)
+ p_Vid->bottom_field_flag = (byte)u_1("SH: bottom_field_flag", currStream);
+ p_Vid->structure = p_Vid->bottom_field_flag ? BOTTOM_FIELD : TOP_FIELD;
+ }
+ else
+ {
+ p_Vid->structure = FRAME;
+ p_Vid->bottom_field_flag = FALSE;
+ }
+ }
+
+ currSlice->structure = (PictureStructure) p_Vid->structure;
+
+ p_Vid->mb_aff_frame_flag=(active_sps->mb_adaptive_frame_field_flag && (p_Vid->field_pic_flag==0));
+ currSlice->mb_aff_frame_flag = p_Vid->mb_aff_frame_flag;
+
+ if (p_Vid->structure == FRAME )
+ assert (p_Vid->field_pic_flag == 0);
+ if (p_Vid->structure == TOP_FIELD )
+ assert (p_Vid->field_pic_flag == 1 && (p_Vid->bottom_field_flag == FALSE));
+ if (p_Vid->structure == BOTTOM_FIELD)
+ assert (p_Vid->field_pic_flag == 1 && (p_Vid->bottom_field_flag == TRUE ));
+
+ if (p_Vid->idr_flag)
+ {
+ p_Vid->idr_pic_id = ue_v("SH: idr_pic_id", currStream);
+ }
+
+ if (active_sps->pic_order_cnt_type == 0)
+ {
+ p_Vid->pic_order_cnt_lsb = u_v(active_sps->log2_max_pic_order_cnt_lsb_minus4 + 4, "SH: pic_order_cnt_lsb", currStream);
+ if( p_Vid->active_pps->bottom_field_pic_order_in_frame_present_flag == 1 && !p_Vid->field_pic_flag )
+ p_Vid->delta_pic_order_cnt_bottom = se_v("SH: delta_pic_order_cnt_bottom", currStream);
+ else
+ p_Vid->delta_pic_order_cnt_bottom = 0;
+ }
+ if( active_sps->pic_order_cnt_type == 1 && !active_sps->delta_pic_order_always_zero_flag )
+ {
+ p_Vid->delta_pic_order_cnt[ 0 ] = se_v("SH: delta_pic_order_cnt[0]", currStream);
+ if( p_Vid->active_pps->bottom_field_pic_order_in_frame_present_flag == 1 && !p_Vid->field_pic_flag )
+ p_Vid->delta_pic_order_cnt[ 1 ] = se_v("SH: delta_pic_order_cnt[1]", currStream);
+ }else
+ {
+ if (active_sps->pic_order_cnt_type == 1)
+ {
+ p_Vid->delta_pic_order_cnt[ 0 ] = 0;
+ p_Vid->delta_pic_order_cnt[ 1 ] = 0;
+ }
+ }
+
+ //! redundant_pic_cnt is missing here
+ if (p_Vid->active_pps->redundant_pic_cnt_present_flag)
+ {
+ p_Vid->redundant_pic_cnt = ue_v ("SH: redundant_pic_cnt", currStream);
+ }
+
+ if(currSlice->slice_type == B_SLICE)
+ {
+ currSlice->direct_spatial_mv_pred_flag = u_1 ("SH: direct_spatial_mv_pred_flag", currStream);
+ }
+
+ currSlice->num_ref_idx_l0_active = p_Vid->active_pps->num_ref_idx_l0_active_minus1 + 1;
+ currSlice->num_ref_idx_l1_active = p_Vid->active_pps->num_ref_idx_l1_active_minus1 + 1;
+
+ if(p_Vid->type==P_SLICE || p_Vid->type == SP_SLICE || p_Vid->type==B_SLICE)
+ {
+ val = u_1 ("SH: num_ref_idx_override_flag", currStream);
+ if (val)
+ {
+ currSlice->num_ref_idx_l0_active = 1 + ue_v ("SH: num_ref_idx_l0_active_minus1", currStream);
+
+ if(p_Vid->type==B_SLICE)
+ {
+ currSlice->num_ref_idx_l1_active = 1 + ue_v ("SH: num_ref_idx_l1_active_minus1", currStream);
+ }
+ }
+ }
+ if (currSlice->slice_type!=B_SLICE)
+ {
+ currSlice->num_ref_idx_l1_active = 0;
+ }
+
+ ref_pic_list_reordering(currSlice);
+
+ currSlice->apply_weights = ((p_Vid->active_pps->weighted_pred_flag && (currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE) )
+ || ((p_Vid->active_pps->weighted_bipred_idc > 0 ) && (currSlice->slice_type == B_SLICE)));
+
+ if ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE)))
+ {
+ pred_weight_table(currSlice);
+ }
+
+ if (p_Vid->nal_reference_idc)
+ dec_ref_pic_marking(p_Vid, currStream);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag && p_Vid->type!=I_SLICE && p_Vid->type!=SI_SLICE)
+ {
+ currSlice->model_number = ue_v("SH: cabac_init_idc", currStream);
+ }
+ else
+ {
+ currSlice->model_number = 0;
+ }
+
+ currSlice->slice_qp_delta = val = se_v("SH: slice_qp_delta", currStream);
+ currSlice->qp = p_Vid->qp = 26 + p_Vid->active_pps->pic_init_qp_minus26 + val;
+
+ if ((p_Vid->qp < -p_Vid->bitdepth_luma_qp_scale) || (p_Vid->qp > 51))
+ error ("slice_qp_delta makes slice_qp_y out of range", 500);
+
+ if(p_Vid->type==SP_SLICE || p_Vid->type == SI_SLICE)
+ {
+ if(p_Vid->type==SP_SLICE)
+ {
+ p_Vid->sp_switch = u_1 ("SH: sp_for_switch_flag", currStream);
+ }
+ currSlice->slice_qs_delta = val = se_v("SH: slice_qs_delta", currStream);
+ currSlice->qs = 26 + p_Vid->active_pps->pic_init_qs_minus26 + val;
+ if ((currSlice->qs < 0) || (currSlice->qs > 51))
+ error ("slice_qs_delta makes slice_qs_y out of range", 500);
+ }
+
+ if ( !HI_INTRA_ONLY_PROFILE || (HI_INTRA_ONLY_PROFILE && (p_Inp->intra_profile_deblocking == 1) ))
+ //then read flags and parameters from bistream
+ {
+ if (p_Vid->active_pps->deblocking_filter_control_present_flag)
+ {
+ currSlice->DFDisableIdc = (short)ue_v ("SH: disable_deblocking_filter_idc", currStream);
+
+ if (currSlice->DFDisableIdc!=1)
+ {
+ currSlice->DFAlphaC0Offset = (short) (2 * se_v("SH: slice_alpha_c0_offset_div2", currStream));
+ currSlice->DFBetaOffset = (short) (2 * se_v("SH: slice_beta_offset_div2", currStream));
+ }
+ else
+ {
+ currSlice->DFAlphaC0Offset = currSlice->DFBetaOffset = 0;
+ }
+ }
+ else
+ {
+ currSlice->DFDisableIdc = currSlice->DFAlphaC0Offset = currSlice->DFBetaOffset = 0;
+ }
+ }
+ else //By default the Loop Filter is Off
+ { //444_TEMP_NOTE: change made below. 08/07/07
+ //still need to parse the SEs (read flags and parameters from bistream) but will ignore
+ if (p_Vid->active_pps->deblocking_filter_control_present_flag)
+ {
+ currSlice->DFDisableIdc = (short) ue_v ("SH: disable_deblocking_filter_idc", currStream);
+
+ if (currSlice->DFDisableIdc!=1)
+ {
+ currSlice->DFAlphaC0Offset = (short) (2 * se_v("SH: slice_alpha_c0_offset_div2", currStream));
+ currSlice->DFBetaOffset = (short) (2 * se_v("SH: slice_beta_offset_div2", currStream));
+ }
+ }//444_TEMP_NOTE. the end of change. 08/07/07
+ //Ignore the SEs, by default the Loop Filter is Off
+ currSlice->DFDisableIdc =1;
+ currSlice->DFAlphaC0Offset = currSlice->DFBetaOffset = 0;
+ }
+
+
+ if (p_Vid->active_pps->num_slice_groups_minus1>0 && p_Vid->active_pps->slice_group_map_type>=3 &&
+ p_Vid->active_pps->slice_group_map_type<=5)
+ {
+ len = (active_sps->pic_height_in_map_units_minus1+1)*(active_sps->pic_width_in_mbs_minus1+1)/
+ (p_Vid->active_pps->slice_group_change_rate_minus1+1);
+ if (((active_sps->pic_height_in_map_units_minus1+1)*(active_sps->pic_width_in_mbs_minus1+1))%
+ (p_Vid->active_pps->slice_group_change_rate_minus1+1))
+ len +=1;
+
+ len = CeilLog2(len+1);
+
+ p_Vid->slice_group_change_cycle = u_v (len, "SH: slice_group_change_cycle", currStream);
+ }
+ p_Vid->PicHeightInMbs = p_Vid->FrameHeightInMbs / ( 1 + p_Vid->field_pic_flag );
+ p_Vid->PicSizeInMbs = p_Vid->PicWidthInMbs * p_Vid->PicHeightInMbs;
+ p_Vid->FrameSizeInMbs = p_Vid->PicWidthInMbs * p_Vid->FrameHeightInMbs;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * read the reference picture reordering information
+ ************************************************************************
+ */
+static void ref_pic_list_reordering(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER];
+ DataPartition *partition = &(currSlice->partArr[dP_nr]);
+ Bitstream *currStream = partition->bitstream;
+ int i, val;
+
+ alloc_ref_pic_list_reordering_buffer(currSlice);
+
+ if (p_Vid->type!=I_SLICE && p_Vid->type!=SI_SLICE)
+ {
+ val = currSlice->ref_pic_list_reordering_flag_l0 = u_1 ("SH: ref_pic_list_reordering_flag_l0", currStream);
+
+ if (val)
+ {
+ i=0;
+ do
+ {
+ val = currSlice->reordering_of_pic_nums_idc_l0[i] = ue_v("SH: reordering_of_pic_nums_idc_l0", currStream);
+ if (val==0 || val==1)
+ {
+ currSlice->abs_diff_pic_num_minus1_l0[i] = ue_v("SH: abs_diff_pic_num_minus1_l0", currStream);
+ }
+ else
+ {
+ if (val==2)
+ {
+ currSlice->long_term_pic_idx_l0[i] = ue_v("SH: long_term_pic_idx_l0", currStream);
+ }
+ }
+ i++;
+ // assert (i>currSlice->num_ref_idx_l0_active);
+ } while (val != 3);
+ }
+ }
+
+ if (p_Vid->type==B_SLICE)
+ {
+ val = currSlice->ref_pic_list_reordering_flag_l1 = u_1 ("SH: ref_pic_list_reordering_flag_l1", currStream);
+
+ if (val)
+ {
+ i=0;
+ do
+ {
+ val = currSlice->reordering_of_pic_nums_idc_l1[i] = ue_v("SH: reordering_of_pic_nums_idc_l1", currStream);
+ if (val==0 || val==1)
+ {
+ currSlice->abs_diff_pic_num_minus1_l1[i] = ue_v("SH: abs_diff_pic_num_minus1_l1", currStream);
+ }
+ else
+ {
+ if (val==2)
+ {
+ currSlice->long_term_pic_idx_l1[i] = ue_v("SH: long_term_pic_idx_l1", currStream);
+ }
+ }
+ i++;
+ // assert (i>currSlice->num_ref_idx_l1_active);
+ } while (val != 3);
+ }
+ }
+
+ // set reference index of redundant slices.
+ if(p_Vid->redundant_pic_cnt && (p_Vid->type != I_SLICE) )
+ {
+ p_Vid->redundant_slice_ref_idx = currSlice->abs_diff_pic_num_minus1_l0[0] + 1;
+ }
+}
+
+
+static void reset_wp_params(Slice *currSlice)
+{
+ int i,comp;
+ int log_weight_denom;
+
+ for (i=0; i<MAX_REFERENCE_PICTURES; i++)
+ {
+ for (comp=0; comp<3; comp++)
+ {
+ log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom;
+ currSlice->wp_weight[0][i][comp] = 1 << log_weight_denom;
+ currSlice->wp_weight[1][i][comp] = 1 << log_weight_denom;
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * read the weighted prediction tables
+ ************************************************************************
+ */
+static void pred_weight_table(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+ byte dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER];
+ DataPartition *partition = &(currSlice->partArr[dP_nr]);
+ Bitstream *currStream = partition->bitstream;
+ int luma_weight_flag_l0, luma_weight_flag_l1, chroma_weight_flag_l0, chroma_weight_flag_l1;
+ int i,j;
+
+ currSlice->luma_log2_weight_denom = ue_v ("SH: luma_log2_weight_denom", currStream);
+ currSlice->wp_round_luma = currSlice->luma_log2_weight_denom ? 1<<(currSlice->luma_log2_weight_denom - 1): 0;
+
+ if ( 0 != active_sps->chroma_format_idc)
+ {
+ currSlice->chroma_log2_weight_denom = ue_v ("SH: chroma_log2_weight_denom", currStream);
+ currSlice->wp_round_chroma = currSlice->chroma_log2_weight_denom ? 1<<(currSlice->chroma_log2_weight_denom - 1): 0;
+ }
+
+ reset_wp_params(currSlice);
+
+ for (i=0; i<currSlice->num_ref_idx_l0_active; i++)
+ {
+ luma_weight_flag_l0 = u_1("SH: luma_weight_flag_l0", currStream);
+
+ if (luma_weight_flag_l0)
+ {
+ currSlice->wp_weight[0][i][0] = se_v ("SH: luma_weight_l0", currStream);
+ currSlice->wp_offset[0][i][0] = se_v ("SH: luma_offset_l0", currStream);
+ currSlice->wp_offset[0][i][0] = currSlice->wp_offset[0][i][0]<<(p_Vid->bitdepth_luma - 8);
+ }
+ else
+ {
+ currSlice->wp_weight[0][i][0] = 1 << currSlice->luma_log2_weight_denom;
+ currSlice->wp_offset[0][i][0] = 0;
+ }
+
+ if (active_sps->chroma_format_idc != 0)
+ {
+ chroma_weight_flag_l0 = u_1 ("SH: chroma_weight_flag_l0", currStream);
+
+ for (j=1; j<3; j++)
+ {
+ if (chroma_weight_flag_l0)
+ {
+ currSlice->wp_weight[0][i][j] = se_v("SH: chroma_weight_l0", currStream);
+ currSlice->wp_offset[0][i][j] = se_v("SH: chroma_offset_l0", currStream);
+ currSlice->wp_offset[0][i][j] = currSlice->wp_offset[0][i][j]<<(p_Vid->bitdepth_chroma-8);
+ }
+ else
+ {
+ currSlice->wp_weight[0][i][j] = 1<<currSlice->chroma_log2_weight_denom;
+ currSlice->wp_offset[0][i][j] = 0;
+ }
+ }
+ }
+ }
+ if ((p_Vid->type == B_SLICE) && p_Vid->active_pps->weighted_bipred_idc == 1)
+ {
+ for (i=0; i<currSlice->num_ref_idx_l1_active; i++)
+ {
+ luma_weight_flag_l1 = u_1("SH: luma_weight_flag_l1", currStream);
+
+ if (luma_weight_flag_l1)
+ {
+ currSlice->wp_weight[1][i][0] = se_v ("SH: luma_weight_l1", currStream);
+ currSlice->wp_offset[1][i][0] = se_v ("SH: luma_offset_l1", currStream);
+ currSlice->wp_offset[1][i][0] = currSlice->wp_offset[1][i][0]<<(p_Vid->bitdepth_luma-8);
+ }
+ else
+ {
+ currSlice->wp_weight[1][i][0] = 1<<currSlice->luma_log2_weight_denom;
+ currSlice->wp_offset[1][i][0] = 0;
+ }
+
+ if (active_sps->chroma_format_idc != 0)
+ {
+ chroma_weight_flag_l1 = u_1 ("SH: chroma_weight_flag_l1", currStream);
+
+ for (j=1; j<3; j++)
+ {
+ if (chroma_weight_flag_l1)
+ {
+ currSlice->wp_weight[1][i][j] = se_v("SH: chroma_weight_l1", currStream);
+ currSlice->wp_offset[1][i][j] = se_v("SH: chroma_offset_l1", currStream);
+ currSlice->wp_offset[1][i][j] = currSlice->wp_offset[1][i][j]<<(p_Vid->bitdepth_chroma-8);
+ }
+ else
+ {
+ currSlice->wp_weight[1][i][j] = 1<<currSlice->chroma_log2_weight_denom;
+ currSlice->wp_offset[1][i][j] = 0;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * read the memory control operations
+ ************************************************************************
+ */
+void dec_ref_pic_marking(VideoParameters *p_Vid, Bitstream *currStream)
+{
+ int val;
+
+ DecRefPicMarking_t *tmp_drpm,*tmp_drpm2;
+
+ // free old buffer content
+ while (p_Vid->dec_ref_pic_marking_buffer)
+ {
+ tmp_drpm=p_Vid->dec_ref_pic_marking_buffer;
+
+ p_Vid->dec_ref_pic_marking_buffer=tmp_drpm->Next;
+ free (tmp_drpm);
+ }
+
+ if (p_Vid->idr_flag)
+ {
+ p_Vid->no_output_of_prior_pics_flag = u_1("SH: no_output_of_prior_pics_flag", currStream);
+ p_Vid->long_term_reference_flag = u_1("SH: long_term_reference_flag", currStream);
+ }
+ else
+ {
+ p_Vid->adaptive_ref_pic_buffering_flag = u_1("SH: adaptive_ref_pic_buffering_flag", currStream);
+ if (p_Vid->adaptive_ref_pic_buffering_flag)
+ {
+ // read Memory Management Control Operation
+ do
+ {
+ tmp_drpm=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t));
+ tmp_drpm->Next=NULL;
+
+ val = tmp_drpm->memory_management_control_operation = ue_v("SH: memory_management_control_operation", currStream);
+
+ if ((val==1)||(val==3))
+ {
+ tmp_drpm->difference_of_pic_nums_minus1 = ue_v("SH: difference_of_pic_nums_minus1", currStream);
+ }
+ if (val==2)
+ {
+ tmp_drpm->long_term_pic_num = ue_v("SH: long_term_pic_num", currStream);
+ }
+
+ if ((val==3)||(val==6))
+ {
+ tmp_drpm->long_term_frame_idx = ue_v("SH: long_term_frame_idx", currStream);
+ }
+ if (val==4)
+ {
+ tmp_drpm->max_long_term_frame_idx_plus1 = ue_v("SH: max_long_term_pic_idx_plus1", currStream);
+ }
+
+ // add command
+ if (p_Vid->dec_ref_pic_marking_buffer==NULL)
+ {
+ p_Vid->dec_ref_pic_marking_buffer=tmp_drpm;
+ }
+ else
+ {
+ tmp_drpm2=p_Vid->dec_ref_pic_marking_buffer;
+ while (tmp_drpm2->Next!=NULL) tmp_drpm2=tmp_drpm2->Next;
+ tmp_drpm2->Next=tmp_drpm;
+ }
+
+ }
+ while (val != 0);
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * To calculate the poc values
+ * based upon JVT-F100d2
+ * POC200301: Until Jan 2003, this function will calculate the correct POC
+ * values, but the management of POCs in buffered pictures may need more work.
+ * \return
+ * none
+ ************************************************************************
+ */
+void decode_poc(VideoParameters *p_Vid)
+{
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+ int i;
+ // for POC mode 0:
+ unsigned int MaxPicOrderCntLsb = (1<<(active_sps->log2_max_pic_order_cnt_lsb_minus4+4));
+
+ switch ( active_sps->pic_order_cnt_type )
+ {
+ case 0: // POC MODE 0
+ // 1st
+ if(p_Vid->idr_flag)
+ {
+ p_Vid->PrevPicOrderCntMsb = 0;
+ p_Vid->PrevPicOrderCntLsb = 0;
+ }
+ else
+ {
+ if (p_Vid->last_has_mmco_5)
+ {
+ if (p_Vid->last_pic_bottom_field)
+ {
+ p_Vid->PrevPicOrderCntMsb = 0;
+ p_Vid->PrevPicOrderCntLsb = 0;
+ }
+ else
+ {
+ p_Vid->PrevPicOrderCntMsb = 0;
+ p_Vid->PrevPicOrderCntLsb = p_Vid->toppoc;
+ }
+ }
+ }
+ // Calculate the MSBs of current picture
+ if( p_Vid->pic_order_cnt_lsb < p_Vid->PrevPicOrderCntLsb &&
+ ( p_Vid->PrevPicOrderCntLsb - p_Vid->pic_order_cnt_lsb ) >= ( MaxPicOrderCntLsb / 2 ) )
+ p_Vid->PicOrderCntMsb = p_Vid->PrevPicOrderCntMsb + MaxPicOrderCntLsb;
+ else if ( p_Vid->pic_order_cnt_lsb > p_Vid->PrevPicOrderCntLsb &&
+ ( p_Vid->pic_order_cnt_lsb - p_Vid->PrevPicOrderCntLsb ) > ( MaxPicOrderCntLsb / 2 ) )
+ p_Vid->PicOrderCntMsb = p_Vid->PrevPicOrderCntMsb - MaxPicOrderCntLsb;
+ else
+ p_Vid->PicOrderCntMsb = p_Vid->PrevPicOrderCntMsb;
+
+ // 2nd
+
+ if(p_Vid->field_pic_flag==0)
+ { //frame pix
+ p_Vid->toppoc = p_Vid->PicOrderCntMsb + p_Vid->pic_order_cnt_lsb;
+ p_Vid->bottompoc = p_Vid->toppoc + p_Vid->delta_pic_order_cnt_bottom;
+ p_Vid->ThisPOC = p_Vid->framepoc = (p_Vid->toppoc < p_Vid->bottompoc)? p_Vid->toppoc : p_Vid->bottompoc; // POC200301
+ }
+ else if (p_Vid->bottom_field_flag == FALSE)
+ { //top field
+ p_Vid->ThisPOC= p_Vid->toppoc = p_Vid->PicOrderCntMsb + p_Vid->pic_order_cnt_lsb;
+ }
+ else
+ { //bottom field
+ p_Vid->ThisPOC= p_Vid->bottompoc = p_Vid->PicOrderCntMsb + p_Vid->pic_order_cnt_lsb;
+ }
+ p_Vid->framepoc=p_Vid->ThisPOC;
+
+ if ( p_Vid->frame_num!=p_Vid->PreviousFrameNum)
+ p_Vid->PreviousFrameNum=p_Vid->frame_num;
+
+ if(p_Vid->nal_reference_idc)
+ {
+ p_Vid->PrevPicOrderCntLsb = p_Vid->pic_order_cnt_lsb;
+ p_Vid->PrevPicOrderCntMsb = p_Vid->PicOrderCntMsb;
+ }
+
+ break;
+
+ case 1: // POC MODE 1
+ // 1st
+ if(p_Vid->idr_flag)
+ {
+ p_Vid->FrameNumOffset=0; // first pix of IDRGOP,
+ p_Vid->delta_pic_order_cnt[0]=0; //ignore first delta
+ if(p_Vid->frame_num)
+ error("frame_num not equal to zero in IDR picture", -1020);
+ }
+ else
+ {
+ if (p_Vid->last_has_mmco_5)
+ {
+ p_Vid->PreviousFrameNumOffset = 0;
+ p_Vid->PreviousFrameNum = 0;
+ }
+ if (p_Vid->frame_num<p_Vid->PreviousFrameNum)
+ { //not first pix of IDRGOP
+ p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset + p_Vid->MaxFrameNum;
+ }
+ else
+ {
+ p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset;
+ }
+ }
+
+ // 2nd
+ if(active_sps->num_ref_frames_in_pic_order_cnt_cycle)
+ p_Vid->AbsFrameNum = p_Vid->FrameNumOffset+p_Vid->frame_num;
+ else
+ p_Vid->AbsFrameNum=0;
+ if( (!p_Vid->nal_reference_idc) && p_Vid->AbsFrameNum > 0)
+ p_Vid->AbsFrameNum--;
+
+ // 3rd
+ p_Vid->ExpectedDeltaPerPicOrderCntCycle=0;
+
+ if(active_sps->num_ref_frames_in_pic_order_cnt_cycle)
+ for(i=0;i<(int) active_sps->num_ref_frames_in_pic_order_cnt_cycle;i++)
+ p_Vid->ExpectedDeltaPerPicOrderCntCycle += active_sps->offset_for_ref_frame[i];
+
+ if(p_Vid->AbsFrameNum)
+ {
+ p_Vid->PicOrderCntCycleCnt = (p_Vid->AbsFrameNum-1)/active_sps->num_ref_frames_in_pic_order_cnt_cycle;
+ p_Vid->FrameNumInPicOrderCntCycle = (p_Vid->AbsFrameNum-1)%active_sps->num_ref_frames_in_pic_order_cnt_cycle;
+ p_Vid->ExpectedPicOrderCnt = p_Vid->PicOrderCntCycleCnt*p_Vid->ExpectedDeltaPerPicOrderCntCycle;
+ for(i=0;i<=(int)p_Vid->FrameNumInPicOrderCntCycle;i++)
+ p_Vid->ExpectedPicOrderCnt += active_sps->offset_for_ref_frame[i];
+ }
+ else
+ p_Vid->ExpectedPicOrderCnt=0;
+
+ if(!p_Vid->nal_reference_idc)
+ p_Vid->ExpectedPicOrderCnt += active_sps->offset_for_non_ref_pic;
+
+ if(p_Vid->field_pic_flag==0)
+ { //frame pix
+ p_Vid->toppoc = p_Vid->ExpectedPicOrderCnt + p_Vid->delta_pic_order_cnt[0];
+ p_Vid->bottompoc = p_Vid->toppoc + active_sps->offset_for_top_to_bottom_field + p_Vid->delta_pic_order_cnt[1];
+ p_Vid->ThisPOC = p_Vid->framepoc = (p_Vid->toppoc < p_Vid->bottompoc)? p_Vid->toppoc : p_Vid->bottompoc; // POC200301
+ }
+ else if (p_Vid->bottom_field_flag == FALSE)
+ { //top field
+ p_Vid->ThisPOC = p_Vid->toppoc = p_Vid->ExpectedPicOrderCnt + p_Vid->delta_pic_order_cnt[0];
+ }
+ else
+ { //bottom field
+ p_Vid->ThisPOC = p_Vid->bottompoc = p_Vid->ExpectedPicOrderCnt + active_sps->offset_for_top_to_bottom_field + p_Vid->delta_pic_order_cnt[0];
+ }
+ p_Vid->framepoc=p_Vid->ThisPOC;
+
+ p_Vid->PreviousFrameNum=p_Vid->frame_num;
+ p_Vid->PreviousFrameNumOffset=p_Vid->FrameNumOffset;
+
+ break;
+
+
+ case 2: // POC MODE 2
+ if(p_Vid->idr_flag) // IDR picture
+ {
+ p_Vid->FrameNumOffset=0; // first pix of IDRGOP,
+ p_Vid->ThisPOC = p_Vid->framepoc = p_Vid->toppoc = p_Vid->bottompoc = 0;
+ if(p_Vid->frame_num)
+ error("frame_num not equal to zero in IDR picture", -1020);
+ }
+ else
+ {
+ if (p_Vid->last_has_mmco_5)
+ {
+ p_Vid->PreviousFrameNum = 0;
+ p_Vid->PreviousFrameNumOffset = 0;
+ }
+ if (p_Vid->frame_num<p_Vid->PreviousFrameNum)
+ p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset + p_Vid->MaxFrameNum;
+ else
+ p_Vid->FrameNumOffset = p_Vid->PreviousFrameNumOffset;
+
+
+ p_Vid->AbsFrameNum = p_Vid->FrameNumOffset+p_Vid->frame_num;
+ if(!p_Vid->nal_reference_idc)
+ p_Vid->ThisPOC = (2*p_Vid->AbsFrameNum - 1);
+ else
+ p_Vid->ThisPOC = (2*p_Vid->AbsFrameNum);
+
+ if (p_Vid->field_pic_flag==0)
+ p_Vid->toppoc = p_Vid->bottompoc = p_Vid->framepoc = p_Vid->ThisPOC;
+ else if (p_Vid->bottom_field_flag == FALSE)
+ p_Vid->toppoc = p_Vid->framepoc = p_Vid->ThisPOC;
+ else p_Vid->bottompoc = p_Vid->framepoc = p_Vid->ThisPOC;
+ }
+
+ p_Vid->PreviousFrameNum=p_Vid->frame_num;
+ p_Vid->PreviousFrameNumOffset=p_Vid->FrameNumOffset;
+ break;
+
+
+ default:
+ //error must occurs
+ assert( 1==0 );
+ break;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * A little helper for the debugging of POC code
+ * \return
+ * none
+ ************************************************************************
+ */
+int dumppoc(VideoParameters *p_Vid)
+{
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ printf ("\nPOC locals...\n");
+ printf ("toppoc %d\n", (int) p_Vid->toppoc);
+ printf ("bottompoc %d\n", (int) p_Vid->bottompoc);
+ printf ("frame_num %d\n", (int) p_Vid->frame_num);
+ printf ("field_pic_flag %d\n", (int) p_Vid->field_pic_flag);
+ printf ("bottom_field_flag %d\n", (int) p_Vid->bottom_field_flag);
+ printf ("POC SPS\n");
+ printf ("log2_max_frame_num_minus4 %d\n", (int) active_sps->log2_max_frame_num_minus4); // POC200301
+ printf ("log2_max_pic_order_cnt_lsb_minus4 %d\n", (int) active_sps->log2_max_pic_order_cnt_lsb_minus4);
+ printf ("pic_order_cnt_type %d\n", (int) active_sps->pic_order_cnt_type);
+ printf ("num_ref_frames_in_pic_order_cnt_cycle %d\n", (int) active_sps->num_ref_frames_in_pic_order_cnt_cycle);
+ printf ("delta_pic_order_always_zero_flag %d\n", (int) active_sps->delta_pic_order_always_zero_flag);
+ printf ("offset_for_non_ref_pic %d\n", (int) active_sps->offset_for_non_ref_pic);
+ printf ("offset_for_top_to_bottom_field %d\n", (int) active_sps->offset_for_top_to_bottom_field);
+ printf ("offset_for_ref_frame[0] %d\n", (int) active_sps->offset_for_ref_frame[0]);
+ printf ("offset_for_ref_frame[1] %d\n", (int) active_sps->offset_for_ref_frame[1]);
+ printf ("POC in SLice Header\n");
+ printf ("bottom_field_pic_order_in_frame_present_flag %d\n", (int) p_Vid->active_pps->bottom_field_pic_order_in_frame_present_flag);
+ printf ("delta_pic_order_cnt[0] %d\n", (int) p_Vid->delta_pic_order_cnt[0]);
+ printf ("delta_pic_order_cnt[1] %d\n", (int) p_Vid->delta_pic_order_cnt[1]);
+ printf ("delta_pic_order_cnt[2] %d\n", (int) p_Vid->delta_pic_order_cnt[2]);
+ printf ("idr_flag %d\n", (int) p_Vid->idr_flag);
+ printf ("MaxFrameNum %d\n", (int) p_Vid->MaxFrameNum);
+
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * return the poc of p_Vid as per (8-1) JVT-F100d2
+ * POC200301
+ ************************************************************************
+ */
+int picture_order(VideoParameters *p_Vid)
+{
+ if (p_Vid->field_pic_flag==0) // is a frame
+ return p_Vid->framepoc;
+ else if (p_Vid->bottom_field_flag == FALSE) // top field
+ return p_Vid->toppoc;
+ else // bottom field
+ return p_Vid->bottompoc;
+}
+
diff --git a/Src/h264dec/ldecod/src/image.c b/Src/h264dec/ldecod/src/image.c
new file mode 100644
index 00000000..a75ff7ea
--- /dev/null
+++ b/Src/h264dec/ldecod/src/image.c
@@ -0,0 +1,1699 @@
+
+/*!
+ ***********************************************************************
+ * \file image.c
+ *
+ * \brief
+ * Decode a Slice
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Inge Lille-Langoy <inge.lille-langoy@telenor.com>
+ * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
+ * - Jani Lainema <jani.lainema@nokia.com>
+ * - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de>
+ * - Byeong-Moon Jeon <jeonbm@lge.com>
+ * - Thomas Wedi <wedi@tnt.uni-hannover.de>
+ * - Gabi Blaettermann
+ * - Ye-Kui Wang <wyk@ieee.org>
+ * - Antti Hallapuro <antti.hallapuro@nokia.com>
+ * - Alexis Tourapis <alexismt@ieee.org>
+ * - Jill Boyce <jill.boyce@thomson.net>
+ * - Saurav K Bandyopadhyay <saurav@ieee.org>
+ * - Zhenyu Wu <Zhenyu.Wu@thomson.net
+ * - Purvin Pandit <Purvin.Pandit@thomson.net>
+ *
+ ***********************************************************************
+ */
+
+#include "contributors.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "global.h"
+#include "image.h"
+#include "fmo.h"
+#include "nalu.h"
+#include "parset.h"
+#include "header.h"
+
+#include "sei.h"
+#include "output.h"
+#include "mb_access.h"
+#include "memalloc.h"
+#include "macroblock.h"
+
+#include "loopfilter.h"
+
+#include "biaridecod.h"
+#include "context_ini.h"
+#include "cabac.h"
+#include "vlc.h"
+#include "quant.h"
+
+#include "errorconcealment.h"
+#include "erc_api.h"
+
+/*!
+ ************************************************************************
+ * \brief
+ * checks if the System is big- or little-endian
+ * \return
+ * 0, little-endian (e.g. Intel architectures)
+ * 1, big-endian (e.g. SPARC, MIPS, PowerPC)
+ ************************************************************************
+ */
+int testEndian(void)
+{
+ short s;
+ byte *p;
+
+ p=(byte*)&s;
+
+ s=1;
+
+ return (*p==0);
+}
+
+static int read_new_slice(Slice *currSlice, uint64_t time_code);
+/*!
+ ************************************************************************
+ * \brief
+ * Initializes the parameters for a new picture
+ ************************************************************************
+ */
+// benski> time_code is user-passed value
+static void init_picture(VideoParameters *p_Vid, Slice *currSlice, InputParameters *p_Inp, uint64_t time_code)
+{
+ int i;
+ int nplane;
+
+ if (p_Vid->dec_picture)
+ {
+ // this may only happen on slice loss
+ exit_picture(p_Vid, &p_Vid->dec_picture);
+ }
+ if (p_Vid->recovery_point)
+ p_Vid->recovery_frame_num = (p_Vid->frame_num + p_Vid->recovery_frame_cnt) % p_Vid->MaxFrameNum;
+
+ if (p_Vid->idr_flag)
+ p_Vid->recovery_frame_num = p_Vid->frame_num;
+
+ if (p_Vid->recovery_point == 0 &&
+ p_Vid->pre_frame_num != INT_MIN &&
+ p_Vid->frame_num != p_Vid->pre_frame_num &&
+ p_Vid->frame_num != (p_Vid->pre_frame_num + 1) % p_Vid->MaxFrameNum)
+ {
+ if (p_Vid->active_sps->gaps_in_frame_num_value_allowed_flag == 0)
+ {
+ /* Advanced Error Concealment would be called here to combat unintentional loss of pictures. */
+ error("An unintentional loss of pictures occurs! Exit\n", 100);
+
+ }
+ fill_frame_num_gap(p_Vid);
+ }
+
+ if(p_Vid->nal_reference_idc)
+ {
+ p_Vid->pre_frame_num = p_Vid->frame_num;
+ }
+
+ p_Vid->num_dec_mb = 0;
+
+ //calculate POC
+ decode_poc(p_Vid);
+
+ if (p_Vid->recovery_frame_num == p_Vid->frame_num &&
+ p_Vid->recovery_poc == 0x7fffffff)
+ p_Vid->recovery_poc = p_Vid->framepoc;
+
+ if(p_Vid->nal_reference_idc)
+ p_Vid->last_ref_pic_poc = p_Vid->framepoc;
+
+ // dumppoc (p_Vid);
+
+ p_Vid->dec_picture = alloc_storable_picture (p_Vid, (PictureStructure) p_Vid->structure, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+ p_Vid->dec_picture->time_code = time_code;
+ p_Vid->dec_picture->top_poc=p_Vid->toppoc;
+ p_Vid->dec_picture->bottom_poc=p_Vid->bottompoc;
+ p_Vid->dec_picture->frame_poc=p_Vid->framepoc;
+ p_Vid->dec_picture->qp = p_Vid->qp;
+ p_Vid->dec_picture->slice_qp_delta = currSlice->slice_qp_delta;
+ p_Vid->dec_picture->chroma_qp_offset[0] = p_Vid->active_pps->chroma_qp_index_offset;
+ p_Vid->dec_picture->chroma_qp_offset[1] = p_Vid->active_pps->second_chroma_qp_index_offset;
+
+ // reset all variables of the error concealment instance before decoding of every frame.
+ // here the third parameter should, if perfectly, be equal to the number of slices per frame.
+ // using little value is ok, the code will allocate more memory if the slice number is larger
+ ercReset(p_Vid->erc_errorVar, p_Vid->PicSizeInMbs, p_Vid->PicSizeInMbs, p_Vid->dec_picture->size_x);
+ p_Vid->erc_mvperMB = 0;
+
+ switch (p_Vid->structure )
+ {
+ case TOP_FIELD:
+ {
+ p_Vid->dec_picture->poc=p_Vid->toppoc;
+ p_Vid->number *= 2;
+ break;
+ }
+ case BOTTOM_FIELD:
+ {
+ p_Vid->dec_picture->poc=p_Vid->bottompoc;
+ p_Vid->number = p_Vid->number * 2 + 1;
+ break;
+ }
+ case FRAME:
+ {
+ p_Vid->dec_picture->poc=p_Vid->framepoc;
+ break;
+ }
+ default:
+ error("p_Vid->structure not initialized", 235);
+ }
+
+ p_Vid->current_slice_nr=0;
+
+ if (p_Vid->type > SI_SLICE)
+ {
+ set_ec_flag(p_Vid, SE_PTYPE);
+ p_Vid->type = P_SLICE; // concealed element
+ }
+
+ // CAVLC init
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ memset(p_Vid->nz_coeff[0], -1, p_Vid->PicSizeInMbs * 48 *sizeof(byte)); // 3 * 4 * 4
+ }
+
+ if(p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0; i<(int)p_Vid->PicSizeInMbs; ++i)
+ {
+ p_Vid->intra_block[i] = 1;
+ }
+ }
+
+ // Set the slice_nr member of each MB to -1, to ensure correct when packet loss occurs
+ // TO set Macroblock Map (mark all MBs as 'have to be concealed')
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ for( nplane=0; nplane<MAX_PLANE; ++nplane )
+ {
+ for(i=0; i<(int)p_Vid->PicSizeInMbs; ++i)
+ {
+ p_Vid->mb_data_JV[nplane][i].slice_nr = -1;
+ p_Vid->mb_data_JV[nplane][i].ei_flag = 1;
+ p_Vid->mb_data_JV[nplane][i].dpl_flag = 0;
+ }
+ }
+ }
+ else
+ {
+ for(i=0; i<(int)p_Vid->PicSizeInMbs; ++i)
+ {
+ p_Vid->mb_data[i].slice_nr = -1;
+ p_Vid->mb_data[i].ei_flag = 1;
+ p_Vid->mb_data[i].dpl_flag = 0;
+ }
+ }
+
+ p_Vid->dec_picture->slice_type = p_Vid->type;
+ p_Vid->dec_picture->used_for_reference = (p_Vid->nal_reference_idc != 0);
+ p_Vid->dec_picture->idr_flag = p_Vid->idr_flag;
+ p_Vid->dec_picture->no_output_of_prior_pics_flag = p_Vid->no_output_of_prior_pics_flag;
+ p_Vid->dec_picture->long_term_reference_flag = p_Vid->long_term_reference_flag;
+ p_Vid->dec_picture->adaptive_ref_pic_buffering_flag = p_Vid->adaptive_ref_pic_buffering_flag;
+
+ p_Vid->dec_picture->dec_ref_pic_marking_buffer = p_Vid->dec_ref_pic_marking_buffer;
+ p_Vid->dec_ref_pic_marking_buffer = NULL;
+
+ p_Vid->dec_picture->mb_aff_frame_flag = p_Vid->mb_aff_frame_flag;
+ p_Vid->dec_picture->PicWidthInMbs = p_Vid->PicWidthInMbs;
+
+ if (p_Vid->dec_picture->mb_aff_frame_flag)
+ {
+ p_Vid->get_mb_block_pos = get_mb_block_pos_mbaff;
+ p_Vid->getNeighbour = getAffNeighbour;
+ p_Vid->getNeighbourXP_NoPos = getAffNeighbour;
+ p_Vid->getNeighbourPX_NoPos = getAffNeighbour;
+ p_Vid->getNeighbourLuma = getAffNeighbourLuma;
+ p_Vid->getNeighbourPXLuma = getAffNeighbourLuma;
+ p_Vid->getNeighbourXPLuma = getAffNeighbourXPLuma;
+ p_Vid->getNeighbourLeftLuma = getAffNeighbourN0Luma;
+ p_Vid->getNeighbourNXLuma = getAffNeighbourNXLuma;
+ p_Vid->getNeighbourLeft = getAffNeighbourN0;
+ p_Vid->getNeighbourUp = getAffNeighbour0N;
+ p_Vid->getNeighbourUpLuma = getAffNeighbour0NLuma;
+ p_Vid->getNeighbourNX = getAffNeighbourNX;
+ p_Vid->getNeighbourNP = getAffNeighbourNX;
+ p_Vid->getNeighbourNPChromaNB = getAffNeighbourNX;
+ p_Vid->getNeighbour0X = getAffNeighbour0X;
+ p_Vid->getNeighbour0XLuma = getAffNeighbour0XLuma;
+ p_Vid->getNeighbourX0 = getAffNeighbourX0;
+ p_Vid->getNeighbourNPLumaNB = getAffNeighbourNPLuma;
+ p_Vid->getNeighbourPXLumaNB = getAffNeighbourPXLumaNB;
+ p_Vid->getNeighbourPXLumaNB_NoPos = getAffNeighbourPXLumaNB_NoPos;
+ p_Vid->getNeighbourXPLumaNB = getAffNeighbourXPLuma;
+ p_Vid->getNeighbourPPLumaNB = getAffNeighbourPPLumaNB;
+ p_Vid->getNeighbourXPLumaNB_NoPos = getAffNeighbourXPLuma;
+ }
+ else
+ {
+ p_Vid->get_mb_block_pos = get_mb_block_pos_normal;
+ p_Vid->getNeighbour = getNonAffNeighbour;
+ p_Vid->getNeighbourXP_NoPos = getNonAffNeighbourXP_NoPos;
+ p_Vid->getNeighbourPX_NoPos = getNonAffNeighbourPX_NoPos;
+ p_Vid->getNeighbourLuma = getNonAffNeighbourLuma;
+ p_Vid->getNeighbourPXLuma = getNonAffNeighbourPXLuma;
+ p_Vid->getNeighbourXPLuma = getNonAffNeighbourXPLuma;
+ p_Vid->getNeighbourLeftLuma = getNonAffNeighbourN0Luma;
+ p_Vid->getNeighbourNXLuma = getNonAffNeighbourNXLuma;
+ p_Vid->getNeighbourLeft = getNonAffNeighbourN0;
+ p_Vid->getNeighbourUp = getNonAffNeighbour0N;
+ p_Vid->getNeighbourUpLuma = getNonAffNeighbour0NLuma;
+ p_Vid->getNeighbourNX = getNonAffNeighbourNX;
+ p_Vid->getNeighbourNP = getNonAffNeighbourNP;
+ p_Vid->getNeighbourNPChromaNB = getNonAffNeighbourNPChromaNB;
+ p_Vid->getNeighbour0X = getNonAffNeighbour0X;
+ p_Vid->getNeighbour0XLuma = getNonAffNeighbour0XLuma;
+ p_Vid->getNeighbourX0 = getNonAffNeighbourX0;
+ p_Vid->getNeighbourNPLumaNB = getNonAffNeighbourNPLumaNB;
+ p_Vid->getNeighbourPXLumaNB = getNonAffNeighbourPXLumaNB;
+ p_Vid->getNeighbourPXLumaNB_NoPos = getNonAffNeighbourPXLumaNB_NoPos;
+ p_Vid->getNeighbourXPLumaNB = getNonAffNeighbourXPLumaNB;
+ p_Vid->getNeighbourPPLumaNB = getNonAffNeighbourPPLumaNB;
+ p_Vid->getNeighbourXPLumaNB_NoPos = getNonAffNeighbourXPLumaNB_NoPos;
+ }
+
+ p_Vid->dec_picture->pic_num = p_Vid->frame_num;
+ p_Vid->dec_picture->frame_num = p_Vid->frame_num;
+
+ p_Vid->dec_picture->recovery_frame = (unsigned int) (p_Vid->frame_num == p_Vid->recovery_frame_num);
+
+ p_Vid->dec_picture->coded_frame = (p_Vid->structure==FRAME);
+
+ p_Vid->dec_picture->chroma_format_idc = p_Vid->active_sps->chroma_format_idc;
+
+ p_Vid->dec_picture->frame_mbs_only_flag = p_Vid->active_sps->frame_mbs_only_flag;
+ p_Vid->dec_picture->frame_cropping_flag = p_Vid->active_sps->frame_cropping_flag;
+
+ if (p_Vid->dec_picture->frame_cropping_flag)
+ {
+ p_Vid->dec_picture->frame_cropping_rect_left_offset = p_Vid->active_sps->frame_cropping_rect_left_offset;
+ p_Vid->dec_picture->frame_cropping_rect_right_offset = p_Vid->active_sps->frame_cropping_rect_right_offset;
+ p_Vid->dec_picture->frame_cropping_rect_top_offset = p_Vid->active_sps->frame_cropping_rect_top_offset;
+ p_Vid->dec_picture->frame_cropping_rect_bottom_offset = p_Vid->active_sps->frame_cropping_rect_bottom_offset;
+ }
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ // store the necessary tone mapping sei into StorablePicture structure
+ p_Vid->dec_picture->seiHasTone_mapping = 0;
+
+ if (p_Vid->seiToneMapping->seiHasTone_mapping)
+ {
+ p_Vid->dec_picture->seiHasTone_mapping = 1;
+ p_Vid->dec_picture->tone_mapping_model_id = p_Vid->seiToneMapping->model_id;
+ p_Vid->dec_picture->tonemapped_bit_depth = p_Vid->seiToneMapping->sei_bit_depth;
+ p_Vid->dec_picture->tone_mapping_lut = malloc(sizeof(int)*(1<<p_Vid->seiToneMapping->coded_data_bit_depth));
+ if (NULL == p_Vid->dec_picture->tone_mapping_lut)
+ {
+ no_mem_exit("init_picture: tone_mapping_lut");
+ }
+ memcpy(p_Vid->dec_picture->tone_mapping_lut, p_Vid->seiToneMapping->lut, sizeof(imgpel)*(1<<p_Vid->seiToneMapping->coded_data_bit_depth));
+ update_tone_mapping_sei(p_Vid->seiToneMapping);
+ }
+#endif
+
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ p_Vid->dec_picture_JV[0] = p_Vid->dec_picture;
+ p_Vid->dec_picture_JV[1] = alloc_storable_picture (p_Vid, (PictureStructure) p_Vid->structure, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+ copy_dec_picture_JV( p_Vid, p_Vid->dec_picture_JV[1], p_Vid->dec_picture_JV[0] );
+ p_Vid->dec_picture_JV[2] = alloc_storable_picture (p_Vid, (PictureStructure) p_Vid->structure, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+ copy_dec_picture_JV( p_Vid, p_Vid->dec_picture_JV[2], p_Vid->dec_picture_JV[0] );
+ }
+}
+
+void MbAffPostProc(VideoParameters *p_Vid)
+{
+ imgpel temp[32][16];
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ imgpel ** imgY = dec_picture->imgY->img;
+ imgpel **imgUV[2] = {dec_picture->imgUV[0]->img, dec_picture->imgUV[1]->img};
+
+ short i, y, x0, y0, uv;
+ for (i=0; i<(int)dec_picture->PicSizeInMbs; i+=2)
+ {
+ if (dec_picture->motion.mb_field[i])
+ {
+ get_mb_pos(p_Vid, i, p_Vid->mb_size[IS_LUMA], &x0, &y0);
+ for (y=0; y<(2*MB_BLOCK_SIZE);++y)
+ memcpy(temp[y], &imgY[y0+y][x0], MB_BLOCK_SIZE * sizeof(imgpel));
+
+ for (y=0; y<MB_BLOCK_SIZE; ++y)
+ {
+ memcpy(&imgY[y0+(2*y )][x0], temp[y ], MB_BLOCK_SIZE * sizeof(imgpel));
+ memcpy(&imgY[y0+(2*y+1)][x0], temp[y+MB_BLOCK_SIZE], MB_BLOCK_SIZE * sizeof(imgpel));
+ }
+
+
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ x0 = (short) (x0 / (16/p_Vid->mb_cr_size_x));
+ y0 = (short) (y0 / (16/p_Vid->mb_cr_size_y));
+
+ for (uv=0; uv<2; ++uv)
+ {
+ for (y=0; y<(2*p_Vid->mb_cr_size_y);++y)
+ memcpy(temp[y], &imgUV[uv][y0+y][x0], p_Vid->mb_cr_size_x * sizeof(imgpel));
+
+ for (y=0; y<p_Vid->mb_cr_size_y;++y)
+ {
+ memcpy(&imgUV[uv][y0+(2*y )][x0], temp[y ], p_Vid->mb_cr_size_x * sizeof(imgpel));
+ memcpy(&imgUV[uv][y0+(2*y+1)][x0], temp[y+p_Vid->mb_cr_size_y], p_Vid->mb_cr_size_x * sizeof(imgpel));
+ }
+ }
+ }
+ }
+ }
+}
+
+static void fill_wp_params(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ int i, j, k;
+ int comp;
+ int log_weight_denom;
+ int tb, td;
+ int tx,DistScaleFactor;
+
+ int max_l0_ref = currSlice->num_ref_idx_l0_active;
+ int max_l1_ref = currSlice->num_ref_idx_l1_active;
+
+ if (p_Vid->active_pps->weighted_bipred_idc == 2)
+ {
+ currSlice->luma_log2_weight_denom = 5;
+ currSlice->chroma_log2_weight_denom = 5;
+ currSlice->wp_round_luma = 16;
+ currSlice->wp_round_chroma = 16;
+
+ for (i=0; i<MAX_REFERENCE_PICTURES; ++i)
+ {
+ for (comp=0; comp<3; ++comp)
+ {
+ log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom;
+ currSlice->wp_weight[0][i][comp] = 1<<log_weight_denom;
+ currSlice->wp_weight[1][i][comp] = 1<<log_weight_denom;
+ currSlice->wp_offset[0][i][comp] = 0;
+ currSlice->wp_offset[1][i][comp] = 0;
+ }
+ }
+ }
+
+
+ for (i=0; i<max_l0_ref; ++i)
+ {
+ for (j=0; j<max_l1_ref; ++j)
+ {
+ for (comp = 0; comp<3; ++comp)
+ {
+ log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom;
+ if (p_Vid->active_pps->weighted_bipred_idc == 1)
+ {
+ currSlice->wbp_weight[0][i][j][comp] = currSlice->wp_weight[0][i][comp];
+ currSlice->wbp_weight[1][i][j][comp] = currSlice->wp_weight[1][j][comp];
+ }
+ else if (p_Vid->active_pps->weighted_bipred_idc == 2)
+ {
+ td = iClip3(-128,127,p_Vid->listX[LIST_1][j]->poc - p_Vid->listX[LIST_0][i]->poc);
+ if (td == 0 || p_Vid->listX[LIST_1][j]->is_long_term || p_Vid->listX[LIST_0][i]->is_long_term)
+ {
+ currSlice->wbp_weight[0][i][j][comp] = 32;
+ currSlice->wbp_weight[1][i][j][comp] = 32;
+ }
+ else
+ {
+ tb = iClip3(-128,127,p_Vid->ThisPOC - p_Vid->listX[LIST_0][i]->poc);
+
+ tx = (16384 + iabs(td/2))/td;
+ DistScaleFactor = iClip3(-1024, 1023, (tx*tb + 32 )>>6);
+
+ currSlice->wbp_weight[1][i][j][comp] = DistScaleFactor >> 2;
+ currSlice->wbp_weight[0][i][j][comp] = 64 - currSlice->wbp_weight[1][i][j][comp];
+ if (currSlice->wbp_weight[1][i][j][comp] < -64 || currSlice->wbp_weight[1][i][j][comp] > 128)
+ {
+ currSlice->wbp_weight[0][i][j][comp] = 32;
+ currSlice->wbp_weight[1][i][j][comp] = 32;
+ currSlice->wp_offset[0][i][comp] = 0;
+ currSlice->wp_offset[1][j][comp] = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ if (currSlice->mb_aff_frame_flag)
+ {
+ for (i=0; i<2*max_l0_ref; ++i)
+ {
+ for (j=0; j<2*max_l1_ref; ++j)
+ {
+ for (comp = 0; comp<3; ++comp)
+ {
+ for (k=2; k<6; k+=2)
+ {
+ currSlice->wp_offset[k+0][i][comp] = currSlice->wp_offset[0][i>>1][comp];
+ currSlice->wp_offset[k+1][j][comp] = currSlice->wp_offset[1][j>>1][comp];
+
+ log_weight_denom = (comp == 0) ? currSlice->luma_log2_weight_denom : currSlice->chroma_log2_weight_denom;
+ if (p_Vid->active_pps->weighted_bipred_idc == 1)
+ {
+ currSlice->wbp_weight[k+0][i][j][comp] = currSlice->wp_weight[0][i>>1][comp];
+ currSlice->wbp_weight[k+1][i][j][comp] = currSlice->wp_weight[1][j>>1][comp];
+ }
+ else if (p_Vid->active_pps->weighted_bipred_idc == 2)
+ {
+ td = iClip3(-128,127,p_Vid->listX[k+LIST_1][j]->poc - p_Vid->listX[k+LIST_0][i]->poc);
+ if (td == 0 || p_Vid->listX[k+LIST_1][j]->is_long_term || p_Vid->listX[k+LIST_0][i]->is_long_term)
+ {
+ currSlice->wbp_weight[k+0][i][j][comp] = 32;
+ currSlice->wbp_weight[k+1][i][j][comp] = 32;
+ }
+ else
+ {
+ tb = iClip3(-128,127,((k==2)?p_Vid->toppoc:p_Vid->bottompoc) - p_Vid->listX[k+LIST_0][i]->poc);
+
+ tx = (16384 + iabs(td/2))/td;
+ DistScaleFactor = iClip3(-1024, 1023, (tx*tb + 32 )>>6);
+
+ currSlice->wbp_weight[k+1][i][j][comp] = DistScaleFactor >> 2;
+ currSlice->wbp_weight[k+0][i][j][comp] = 64 - currSlice->wbp_weight[k+1][i][j][comp];
+ if (currSlice->wbp_weight[k+1][i][j][comp] < -64 || currSlice->wbp_weight[k+1][i][j][comp] > 128)
+ {
+ currSlice->wbp_weight[k+1][i][j][comp] = 32;
+ currSlice->wbp_weight[k+0][i][j][comp] = 32;
+ currSlice->wp_offset[k+0][i][comp] = 0;
+ currSlice->wp_offset[k+1][j][comp] = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void decode_slice(Slice *currSlice, int current_header)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag)
+ {
+ init_contexts (currSlice);
+ cabac_new_slice(currSlice);
+ }
+
+ if ( (p_Vid->active_pps->weighted_bipred_idc > 0 && (currSlice->slice_type == B_SLICE)) || (p_Vid->active_pps->weighted_pred_flag && currSlice->slice_type !=I_SLICE))
+ fill_wp_params(currSlice);
+
+ //printf("frame picture %d %d %d\n",p_Vid->structure,p_Vid->ThisPOC,currSlice->direct_spatial_mv_pred_flag);
+
+ // decode main slice information
+ if (current_header == SOP || current_header == SOS)
+ decode_one_slice(currSlice);
+
+ // setMB-Nr in case this slice was lost
+ // if(currSlice->ei_flag)
+ // p_Vid->current_mb_nr = currSlice->last_mb_nr + 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Error tracking: if current frame is lost or any reference frame of
+ * current frame is lost, current frame is incorrect.
+ ************************************************************************
+ */
+static void Error_tracking(VideoParameters *p_Vid)
+{
+ Slice *currSlice = p_Vid->currentSlice;
+ int i;
+
+ if(p_Vid->redundant_pic_cnt == 0)
+ {
+ p_Vid->Is_primary_correct = p_Vid->Is_redundant_correct = 1;
+ }
+
+ if(p_Vid->redundant_pic_cnt == 0 && p_Vid->type != I_SLICE)
+ {
+ for(i=0;i<currSlice->num_ref_idx_l0_active;++i)
+ {
+ if(p_Vid->ref_flag[i] == 0) // any reference of primary slice is incorrect
+ {
+ p_Vid->Is_primary_correct = 0; // primary slice is incorrect
+ }
+ }
+ }
+ else if(p_Vid->redundant_pic_cnt != 0 && p_Vid->type != I_SLICE)
+ {
+ if(p_Vid->ref_flag[p_Vid->redundant_slice_ref_idx] == 0) // reference of redundant slice is incorrect
+ {
+ p_Vid->Is_redundant_correct = 0; // redundant slice is incorrect
+ }
+ }
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * decodes one I- or P-frame
+ *
+ ***********************************************************************
+ */
+
+int decode_one_frame(VideoParameters *p_Vid, uint64_t time_code)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+
+ int current_header;
+ int i;
+
+ while (1)
+ {
+ Slice *currSlice = p_Vid->currentSlice;
+ currSlice->p_Vid = p_Vid;
+ currSlice->p_Inp = p_Inp;
+
+ current_header = read_new_slice(p_Vid->currentSlice, time_code);
+ if (current_header == EOS)
+ return EOS;
+
+ // error tracking of primary and redundant slices.
+ Error_tracking(p_Vid);
+
+ // If primary and redundant are received and primary is correct, discard the redundant
+ // else, primary slice will be replaced with redundant slice.
+ if(p_Vid->frame_num == p_Vid->previous_frame_num && p_Vid->redundant_pic_cnt !=0
+ && p_Vid->Is_primary_correct !=0 && current_header != EOS)
+ {
+ continue;
+ }
+
+ // update reference flags and set current p_Vid->ref_flag
+ if(!(p_Vid->redundant_pic_cnt != 0 && p_Vid->previous_frame_num == p_Vid->frame_num))
+ {
+ for(i=16;i>0;i--)
+ {
+ p_Vid->ref_flag[i] = p_Vid->ref_flag[i-1];
+ }
+ }
+ p_Vid->ref_flag[0] = p_Vid->redundant_pic_cnt==0 ? p_Vid->Is_primary_correct : p_Vid->Is_redundant_correct;
+ p_Vid->previous_frame_num = p_Vid->frame_num;
+
+ if (current_header == EOS)
+ {
+// exit_picture(p_Vid, &p_Vid->dec_picture);
+ return EOS;
+ }
+
+ if((p_Vid->active_sps->chroma_format_idc==0)||(p_Vid->active_sps->chroma_format_idc==3))
+ {
+ currSlice->linfo_cbp_intra = linfo_cbp_intra_other;
+ currSlice->linfo_cbp_inter = linfo_cbp_inter_other;
+ }
+ else
+ {
+ currSlice->linfo_cbp_intra = linfo_cbp_intra_normal;
+ currSlice->linfo_cbp_inter = linfo_cbp_inter_normal;
+ }
+
+ decode_slice(currSlice, current_header);
+
+ ++(p_Vid->current_slice_nr);
+ }
+
+ exit_picture(p_Vid, &p_Vid->dec_picture);
+
+ return (SOP);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Convert file read buffer to source picture structure
+ * \param imgX
+ * Pointer to image plane
+ * \param buf
+ * Buffer for file output
+ * \param size_x
+ * horizontal image size in pixel
+ * \param size_y
+ * vertical image size in pixel
+ * \param symbol_size_in_bytes
+ * number of bytes used per pel
+ ************************************************************************
+ */
+void buffer2img (imgpel** imgX, unsigned char* buf, int size_x, int size_y, int symbol_size_in_bytes)
+{
+ int i,j;
+
+ uint16 tmp16, ui16;
+ unsigned long tmp32, ui32;
+
+ if (symbol_size_in_bytes> sizeof(imgpel))
+ {
+ error ("Source picture has higher bit depth than imgpel data type. \nPlease recompile with larger data type for imgpel.", 500);
+ }
+
+ if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes))
+ {
+ // imgpel == pixel_in_file == 1 byte -> simple copy
+ memcpy(&imgX[0][0], buf, size_x * size_y);
+ }
+ else
+ {
+ // sizeof (imgpel) > sizeof(char)
+ if (testEndian())
+ {
+ // big endian
+ switch (symbol_size_in_bytes)
+ {
+ case 1:
+ {
+ for(j = 0; j < size_y; ++j)
+ for(i = 0; i < size_x; ++i)
+ {
+ imgX[j][i]= buf[i+j*size_x];
+ }
+ break;
+ }
+ case 2:
+ {
+ for(j=0;j<size_y;++j)
+ for(i=0;i<size_x;++i)
+ {
+ memcpy(&tmp16, buf+((i+j*size_x)*2), 2);
+ ui16 = (uint16) ((tmp16 >> 8) | ((tmp16&0xFF)<<8));
+ imgX[j][i] = (imgpel) ui16;
+ }
+ break;
+ }
+ case 4:
+ {
+ for(j=0;j<size_y;++j)
+ for(i=0;i<size_x;++i)
+ {
+ memcpy(&tmp32, buf+((i+j*size_x)*4), 4);
+ ui32 = ((tmp32&0xFF00)<<8) | ((tmp32&0xFF)<<24) | ((tmp32&0xFF0000)>>8) | ((tmp32&0xFF000000)>>24);
+ imgX[j][i] = (imgpel) ui32;
+ }
+ }
+ default:
+ {
+ error ("reading only from formats of 8, 16 or 32 bit allowed on big endian architecture", 500);
+ break;
+ }
+ }
+
+ }
+ else
+ {
+ // little endian
+ if (symbol_size_in_bytes == 1)
+ {
+ for (j=0; j < size_y; ++j)
+ {
+ for (i=0; i < size_x; ++i)
+ {
+ imgX[j][i]=*(buf++);
+ }
+ }
+ }
+ else
+ {
+ for (j=0; j < size_y; ++j)
+ {
+ int jpos = j*size_x;
+ for (i=0; i < size_x; ++i)
+ {
+ imgX[j][i]=0;
+ memcpy(&(imgX[j][i]), buf +((i+jpos)*symbol_size_in_bytes), symbol_size_in_bytes);
+ }
+ }
+ }
+
+ }
+ }
+}
+
+void reorder_lists(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ if ((currSlice->slice_type != I_SLICE)&&(currSlice->slice_type != SI_SLICE))
+ {
+ if (currSlice->ref_pic_list_reordering_flag_l0)
+ {
+ reorder_ref_pic_list(p_Vid, p_Vid->listX[0], &p_Vid->listXsize[0],
+ currSlice->num_ref_idx_l0_active - 1,
+ currSlice->reordering_of_pic_nums_idc_l0,
+ currSlice->abs_diff_pic_num_minus1_l0,
+ currSlice->long_term_pic_idx_l0);
+ }
+ if (p_Vid->no_reference_picture == p_Vid->listX[0][currSlice->num_ref_idx_l0_active-1])
+ {
+ if (p_Vid->non_conforming_stream)
+ printf("RefPicList0[ num_ref_idx_l0_active_minus1 ] is equal to 'no reference picture'\n");
+ else
+ error("RefPicList0[ num_ref_idx_l0_active_minus1 ] is equal to 'no reference picture', invalid bitstream",500);
+ }
+ // that's a definition
+ p_Vid->listXsize[0] = (char) currSlice->num_ref_idx_l0_active;
+ }
+ if (currSlice->slice_type == B_SLICE)
+ {
+ if (currSlice->ref_pic_list_reordering_flag_l1)
+ {
+ reorder_ref_pic_list(p_Vid, p_Vid->listX[1], &p_Vid->listXsize[1],
+ currSlice->num_ref_idx_l1_active - 1,
+ currSlice->reordering_of_pic_nums_idc_l1,
+ currSlice->abs_diff_pic_num_minus1_l1,
+ currSlice->long_term_pic_idx_l1);
+ }
+ if (p_Vid->no_reference_picture == p_Vid->listX[1][currSlice->num_ref_idx_l1_active-1])
+ {
+ if (p_Vid->non_conforming_stream)
+ printf("RefPicList1[ num_ref_idx_l1_active_minus1 ] is equal to 'no reference picture'\n");
+ else
+ error("RefPicList1[ num_ref_idx_l1_active_minus1 ] is equal to 'no reference picture', invalid bitstream",500);
+ }
+ // that's a definition
+ p_Vid->listXsize[1] = (char) currSlice->num_ref_idx_l1_active;
+ }
+
+ free_ref_pic_list_reordering_buffer(currSlice);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * initialize ref_pic_num array
+ ************************************************************************
+ */
+void set_ref_pic_num(VideoParameters *p_Vid)
+{
+ int i,j;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int slice_id=p_Vid->current_slice_nr;
+
+ for (i=0;i<p_Vid->listXsize[LIST_0];++i)
+ {
+ dec_picture->ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->poc * 2 + ((p_Vid->listX[LIST_0][i]->structure==BOTTOM_FIELD)?1:0) ;
+ dec_picture->frm_ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->frame_poc * 2;
+ dec_picture->top_ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->top_poc * 2;
+ dec_picture->bottom_ref_pic_num [slice_id][LIST_0][i] = p_Vid->listX[LIST_0][i]->bottom_poc * 2 + 1;
+ //printf("POCS %d %d %d %d ",p_Vid->listX[LIST_0][i]->frame_poc,p_Vid->listX[LIST_0][i]->bottom_poc,p_Vid->listX[LIST_0][i]->top_poc,p_Vid->listX[LIST_0][i]->poc);
+ //printf("refid %d %d %d %d\n",(int) dec_picture->frm_ref_pic_num[LIST_0][i],(int) dec_picture->top_ref_pic_num[LIST_0][i],(int) dec_picture->bottom_ref_pic_num[LIST_0][i],(int) dec_picture->ref_pic_num[LIST_0][i]);
+ }
+
+ for (i=0;i<p_Vid->listXsize[LIST_1];++i)
+ {
+ dec_picture->ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->poc *2 + ((p_Vid->listX[LIST_1][i]->structure==BOTTOM_FIELD)?1:0);
+ dec_picture->frm_ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->frame_poc * 2;
+ dec_picture->top_ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->top_poc * 2;
+ dec_picture->bottom_ref_pic_num [slice_id][LIST_1][i] = p_Vid->listX[LIST_1][i]->bottom_poc * 2 + 1;
+ }
+
+ if (!p_Vid->active_sps->frame_mbs_only_flag)
+ {
+ if (p_Vid->structure==FRAME)
+ {
+ for (j=2;j<6;++j)
+ {
+ for (i=0;i<p_Vid->listXsize[j];++i)
+ {
+ dec_picture->ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->poc * 2 + ((p_Vid->listX[j][i]->structure==BOTTOM_FIELD)?1:0);
+ dec_picture->frm_ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->frame_poc * 2 ;
+ dec_picture->top_ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->top_poc * 2 ;
+ dec_picture->bottom_ref_pic_num [slice_id][j][i] = p_Vid->listX[j][i]->bottom_poc * 2 + 1;
+ }
+ }
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Reads new slice from bit_stream
+ ************************************************************************
+ */
+
+// benski> time_code is user-passed data
+static int read_new_slice(Slice *currSlice, uint64_t time_code)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ InputParameters *p_Inp = currSlice->p_Inp;
+
+ NALU_t *nalu = p_Vid->nalu;
+
+ int current_header = 0;
+ Bitstream *currStream;
+
+ int slice_id_a, slice_id_b, slice_id_c;
+ int redundant_pic_cnt_b, redundant_pic_cnt_c;
+
+ if (!nalu)
+ return EOS;
+
+ for (;;)
+ {
+ if (0 == read_next_nalu(p_Vid, nalu))
+ return EOS;
+
+process_nalu:
+
+ switch (nalu->nal_unit_type)
+ {
+ case NALU_TYPE_SLICE:
+ case NALU_TYPE_IDR:
+
+ if (p_Vid->recovery_point || nalu->nal_unit_type == NALU_TYPE_IDR)
+ {
+ if (p_Vid->recovery_point_found == 0)
+ {
+ if (nalu->nal_unit_type != NALU_TYPE_IDR)
+ {
+ printf("Warning: Decoding does not start with an IDR picture.\n");
+ p_Vid->non_conforming_stream = 1;
+ }
+ else
+ p_Vid->non_conforming_stream = 0;
+ }
+ p_Vid->recovery_point_found = 1;
+ }
+
+ if (p_Vid->recovery_point_found == 0)
+ break;
+
+ p_Vid->idr_flag = (nalu->nal_unit_type == NALU_TYPE_IDR);
+ p_Vid->nal_reference_idc = nalu->nal_reference_idc;
+
+ currSlice->dp_mode = PAR_DP_1;
+ currSlice->max_part_nr = 1;
+ currStream = currSlice->partArr[0].bitstream;
+ currStream->frame_bitoffset = currStream->read_len = 0;
+ //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1);
+ currStream->streamBuffer = &nalu->buf[1];
+ currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1);
+
+ // Some syntax of the Slice Header depends on the parameter set, which depends on
+ // the parameter set ID of the SLice header. Hence, read the pic_parameter_set_id
+ // of the slice header first, then setup the active parameter sets, and then read
+ // the rest of the slice header
+ FirstPartOfSliceHeader(currSlice);
+ UseParameterSet (currSlice, currSlice->pic_parameter_set_id);
+ RestOfSliceHeader (currSlice);
+
+ fmo_init (p_Vid);
+ currSlice->active_sps = p_Vid->active_sps;
+ currSlice->active_pps = p_Vid->active_pps;
+
+ assign_quant_params (currSlice);
+
+ // if primary slice is replaced with redundant slice, set the correct image type
+ if(p_Vid->redundant_pic_cnt && p_Vid->Is_primary_correct==0 && p_Vid->Is_redundant_correct)
+ {
+ p_Vid->dec_picture->slice_type = p_Vid->type;
+ }
+
+ if(is_new_picture(p_Vid->dec_picture, currSlice, p_Vid->old_slice))
+ {
+ init_picture(p_Vid, currSlice, p_Inp, time_code);
+
+ current_header = SOP;
+ //check zero_byte if it is also the first NAL unit in the access unit
+ CheckZeroByteVCL(p_Vid, nalu);
+ }
+ else
+ current_header = SOS;
+
+ if (currSlice->slice_type == B_SLICE)
+ {
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ int nplane = 0;
+ for( nplane=0; nplane<MAX_PLANE; nplane++ )
+ {
+ if( NULL != currSlice->Co_located_JV[nplane] )
+ {
+ free_colocated(p_Vid, currSlice->Co_located_JV[nplane]);
+ currSlice->Co_located_JV[nplane] = NULL;
+ }
+ currSlice->Co_located_JV[nplane] = alloc_colocated (p_Vid, p_Vid->width, p_Vid->height, p_Vid->active_sps->mb_adaptive_frame_field_flag);
+ }
+ }
+ else
+ {
+ if (NULL != currSlice->p_colocated)
+ {
+ free_colocated(p_Vid, currSlice->p_colocated);
+ currSlice->p_colocated = NULL;
+ }
+ currSlice->p_colocated = alloc_colocated (p_Vid, p_Vid->width, p_Vid->height,p_Vid->active_sps->mb_adaptive_frame_field_flag);
+ }
+ }
+
+
+ init_lists(currSlice);
+ reorder_lists (currSlice);
+
+ if (p_Vid->structure==FRAME)
+ {
+ init_mbaff_lists(p_Vid);
+ }
+
+ // From here on, p_Vid->active_sps, p_Vid->active_pps and the slice header are valid
+ if (currSlice->mb_aff_frame_flag)
+ p_Vid->current_mb_nr = currSlice->start_mb_nr << 1;
+ else
+ p_Vid->current_mb_nr = currSlice->start_mb_nr;
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag)
+ {
+ int ByteStartPosition = currStream->frame_bitoffset/8;
+ if (currStream->frame_bitoffset%8 != 0)
+ {
+ ++ByteStartPosition;
+ }
+ arideco_start_decoding (&currSlice->partArr[0].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len);
+ }
+ // printf ("read_new_slice: returning %s\n", current_header == SOP?"SOP":"SOS");
+ p_Vid->recovery_point = 0;
+ return current_header;
+ break;
+ case NALU_TYPE_DPA:
+ // read DP_A
+ currSlice->dpB_NotPresent =1;
+ currSlice->dpC_NotPresent =1;
+
+ p_Vid->idr_flag = FALSE;
+ p_Vid->nal_reference_idc = nalu->nal_reference_idc;
+ currSlice->dp_mode = PAR_DP_3;
+ currSlice->max_part_nr = 3;
+ currStream = currSlice->partArr[0].bitstream;
+ currStream->frame_bitoffset = currStream->read_len = 0;
+ //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1);
+ currStream->streamBuffer = &nalu->buf[1];
+ currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1);
+
+ FirstPartOfSliceHeader(currSlice);
+ UseParameterSet (currSlice, currSlice->pic_parameter_set_id);
+ RestOfSliceHeader (currSlice);
+
+ fmo_init (p_Vid);
+
+ if(is_new_picture(p_Vid->dec_picture, currSlice, p_Vid->old_slice))
+ {
+ init_picture(p_Vid, currSlice, p_Inp, time_code);
+ current_header = SOP;
+ CheckZeroByteVCL(p_Vid, nalu);
+ }
+ else
+ current_header = SOS;
+
+ init_lists(currSlice);
+ reorder_lists (currSlice);
+
+ if (p_Vid->structure==FRAME)
+ {
+ init_mbaff_lists(p_Vid);
+ }
+
+ // From here on, p_Vid->active_sps, p_Vid->active_pps and the slice header are valid
+ if (currSlice->mb_aff_frame_flag)
+ p_Vid->current_mb_nr = currSlice->start_mb_nr << 1;
+ else
+ p_Vid->current_mb_nr = currSlice->start_mb_nr;
+
+ // Now I need to read the slice ID, which depends on the value of
+ // redundant_pic_cnt_present_flag
+
+ slice_id_a = ue_v("NALU: DP_A slice_id", currStream);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag)
+ error ("received data partition with CABAC, this is not allowed", 500);
+
+ // continue with reading next DP
+ if (0 == read_next_nalu(p_Vid, nalu))
+ return current_header;
+
+ if ( NALU_TYPE_DPB == nalu->nal_unit_type)
+ {
+ // we got a DPB
+ currStream = currSlice->partArr[1].bitstream;
+ currStream->frame_bitoffset = currStream->read_len = 0;
+
+ //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1);
+ currStream->streamBuffer = &nalu->buf[1];
+ currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1);
+
+ slice_id_b = ue_v("NALU: DP_B slice_id", currStream);
+
+ currSlice->dpB_NotPresent = 0;
+
+ if ((slice_id_b != slice_id_a) || (nalu->lost_packets))
+ {
+ printf ("Waning: got a data partition B which does not match DP_A (DP loss!)\n");
+ currSlice->dpB_NotPresent =1;
+ currSlice->dpC_NotPresent =1;
+ }
+ else
+ {
+ if (p_Vid->active_pps->redundant_pic_cnt_present_flag)
+ redundant_pic_cnt_b = ue_v("NALU: DP_B redudant_pic_cnt", currStream);
+ else
+ redundant_pic_cnt_b = 0;
+
+ // we're finished with DP_B, so let's continue with next DP
+ if (0 == read_next_nalu(p_Vid, nalu))
+ return current_header;
+ }
+ }
+ else
+ {
+ currSlice->dpB_NotPresent =1;
+ }
+
+ // check if we got DP_C
+ if ( NALU_TYPE_DPC == nalu->nal_unit_type)
+ {
+ currStream = currSlice->partArr[2].bitstream;
+ currStream->frame_bitoffset = currStream->read_len = 0;
+
+ //memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1);
+ currStream->streamBuffer = &nalu->buf[1];
+ currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1);
+
+ currSlice->dpC_NotPresent = 0;
+
+ slice_id_c = ue_v("NALU: DP_C slice_id", currStream);
+ if ((slice_id_c != slice_id_a)|| (nalu->lost_packets))
+ {
+ printf ("Warning: got a data partition C which does not match DP_A(DP loss!)\n");
+ //currSlice->dpB_NotPresent =1;
+ currSlice->dpC_NotPresent =1;
+ }
+
+ if (p_Vid->active_pps->redundant_pic_cnt_present_flag)
+ redundant_pic_cnt_c = ue_v("NALU:SLICE_C redudand_pic_cnt", currStream);
+ else
+ redundant_pic_cnt_c = 0;
+ }
+ else
+ {
+ currSlice->dpC_NotPresent =1;
+ }
+
+ // check if we read anything else than the expected partitions
+ if ((nalu->nal_unit_type != NALU_TYPE_DPB) && (nalu->nal_unit_type != NALU_TYPE_DPC))
+ {
+ // we have a NALI that we can't process here, so restart processing
+ goto process_nalu;
+ // yes, "goto" should not be used, but it's really the best way here before we restructure the decoding loop
+ // (which should be taken care of anyway)
+ }
+
+ return current_header;
+
+ break;
+ case NALU_TYPE_DPB:
+ printf ("found data partition B without matching DP A, discarding\n");
+ break;
+ case NALU_TYPE_DPC:
+ printf ("found data partition C without matching DP A, discarding\n");
+ break;
+ case NALU_TYPE_SEI:
+ //printf ("read_new_slice: Found NALU_TYPE_SEI, len %d\n", nalu->len);
+ InterpretSEIMessage(nalu->buf,nalu->len,p_Vid);
+ break;
+ case NALU_TYPE_PPS:
+ ProcessPPS(p_Vid, nalu);
+ break;
+ case NALU_TYPE_SPS:
+ ProcessSPS(p_Vid, nalu);
+ break;
+ case NALU_TYPE_AUD:
+// printf ("read_new_slice: Found 'Access Unit Delimiter' NAL unit, len %d, ignored\n", nalu->len);
+ break;
+ case NALU_TYPE_EOSEQ:
+// printf ("read_new_slice: Found 'End of Sequence' NAL unit, len %d, ignored\n", nalu->len);
+ break;
+ case NALU_TYPE_EOSTREAM:
+// printf ("read_new_slice: Found 'End of Stream' NAL unit, len %d, ignored\n", nalu->len);
+ break;
+ case NALU_TYPE_FILL:
+ printf ("read_new_slice: Found NALU_TYPE_FILL, len %d\n", (int) nalu->len);
+ printf ("Skipping these filling bits, proceeding w/ next NALU\n");
+ break;
+ default:
+ printf ("Found NALU type %d, len %d undefined, ignore NALU, moving on\n", (int) nalu->nal_unit_type, (int) nalu->len);
+ break;
+ }
+ }
+
+ return current_header;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * finish decoding of a picture, conceal errors and store it
+ * into the DPB
+ ************************************************************************
+ */
+void exit_picture(VideoParameters *p_Vid, StorablePicture **dec_picture)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+ int ercStartMB;
+ int ercSegment;
+ frame recfr;
+ unsigned int i;
+ int structure, frame_poc, slice_type, refpic, qp, pic_num, chroma_format_idc, is_idr;
+
+ int nplane;
+
+ // return if the last picture has already been finished
+ if (*dec_picture==NULL)
+ {
+ return;
+ }
+
+ recfr.p_Vid = p_Vid;
+ recfr.yptr = (*dec_picture)->imgY->base_address;
+ if ((*dec_picture)->chroma_format_idc != YUV400)
+ {
+ recfr.uptr = (*dec_picture)->imgUV[0]->base_address;
+ recfr.vptr = (*dec_picture)->imgUV[1]->base_address;
+ }
+
+ //! this is always true at the beginning of a picture
+ ercStartMB = 0;
+ ercSegment = 0;
+
+ //! mark the start of the first segment
+ if (!(*dec_picture)->mb_aff_frame_flag)
+ {
+ ercStartSegment(0, ercSegment, 0 , p_Vid->erc_errorVar);
+ //! generate the segments according to the macroblock map
+ for(i = 1; i<(*dec_picture)->PicSizeInMbs; ++i)
+ {
+ if(p_Vid->mb_data[i].ei_flag != p_Vid->mb_data[i-1].ei_flag)
+ {
+ ercStopSegment(i-1, ercSegment, 0, p_Vid->erc_errorVar); //! stop current segment
+
+ //! mark current segment as lost or OK
+ if(p_Vid->mb_data[i-1].ei_flag)
+ ercMarkCurrSegmentLost((*dec_picture)->size_x, p_Vid->erc_errorVar);
+ else
+ ercMarkCurrSegmentOK((*dec_picture)->size_x, p_Vid->erc_errorVar);
+
+ ++ercSegment; //! next segment
+ ercStartSegment(i, ercSegment, 0 , p_Vid->erc_errorVar); //! start new segment
+ ercStartMB = i;//! save start MB for this segment
+ }
+ }
+ //! mark end of the last segment
+ ercStopSegment((*dec_picture)->PicSizeInMbs-1, ercSegment, 0, p_Vid->erc_errorVar);
+ if(p_Vid->mb_data[i-1].ei_flag)
+ ercMarkCurrSegmentLost((*dec_picture)->size_x, p_Vid->erc_errorVar);
+ else
+ ercMarkCurrSegmentOK((*dec_picture)->size_x, p_Vid->erc_errorVar);
+
+ //! call the right error concealment function depending on the frame type.
+ p_Vid->erc_mvperMB /= (*dec_picture)->PicSizeInMbs;
+
+ p_Vid->erc_img = p_Vid;
+ if((*dec_picture)->slice_type == I_SLICE || (*dec_picture)->slice_type == SI_SLICE) // I-frame
+ ercConcealIntraFrame(p_Vid, &recfr, (*dec_picture)->size_x, (*dec_picture)->size_y, p_Vid->erc_errorVar);
+ else
+ ercConcealInterFrame(&recfr, p_Vid->erc_object_list, (*dec_picture)->size_x, (*dec_picture)->size_y, p_Vid->erc_errorVar, (*dec_picture)->chroma_format_idc);
+ }
+
+ //deblocking for frame or field
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ int colour_plane_id = p_Vid->colour_plane_id;
+ for( nplane=0; nplane<MAX_PLANE; ++nplane )
+ {
+ change_plane_JV( p_Vid, nplane );
+ DeblockPicture( p_Vid, *dec_picture );
+ }
+ p_Vid->colour_plane_id = colour_plane_id;
+ make_frame_picture_JV(p_Vid);
+ }
+ else
+ {
+ DeblockPicture( p_Vid, *dec_picture );
+ }
+
+ if ((*dec_picture)->mb_aff_frame_flag)
+ MbAffPostProc(p_Vid);
+
+ if (p_Vid->structure == FRAME) // buffer mgt. for frame mode
+ frame_postprocessing(p_Vid);
+ else
+ field_postprocessing(p_Vid); // reset all interlaced variables
+
+ structure = (*dec_picture)->structure;
+ slice_type = (*dec_picture)->slice_type;
+ frame_poc = (*dec_picture)->frame_poc;
+ refpic = (*dec_picture)->used_for_reference;
+ qp = (*dec_picture)->qp;
+ pic_num = (*dec_picture)->pic_num;
+ is_idr = (*dec_picture)->idr_flag;
+
+ chroma_format_idc = (*dec_picture)->chroma_format_idc;
+
+ store_picture_in_dpb(p_Vid, *dec_picture);
+ *dec_picture=NULL;
+
+ if (p_Vid->last_has_mmco_5)
+ {
+ p_Vid->pre_frame_num = 0;
+ }
+
+
+ if ((structure==FRAME)||structure==BOTTOM_FIELD)
+ {
+ if(slice_type == I_SLICE || slice_type == SI_SLICE || slice_type == P_SLICE || refpic) // I or P pictures
+ ++(p_Vid->number);
+
+ }
+
+ p_Vid->current_mb_nr = -4712; // impossible value for debugging, StW
+ p_Vid->current_slice_nr = 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * write the encoding mode and motion vectors of current
+ * MB to the buffer of the error concealment module.
+ ************************************************************************
+ */
+
+void ercWriteMBMODEandMV(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i, ii, jj, currMBNum = p_Vid->current_mb_nr;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int mbx = 4*xPosMB(currMBNum, dec_picture->size_x), mby = 4*yPosMB(currMBNum, dec_picture->size_x);
+ objectBuffer_t *currRegion, *pRegion;
+
+ currRegion = p_Vid->erc_object_list + (currMBNum<<2);
+
+ if(p_Vid->type != B_SLICE) //non-B frame
+ {
+ for (i=0; i<4; ++i)
+ {
+ pRegion = currRegion + i;
+ pRegion->regionMode = (currMB->mb_type ==I16MB ? REGMODE_INTRA :
+ currMB->b8mode[i]==IBLOCK ? REGMODE_INTRA_8x8 :
+ currMB->b8mode[i]==0 ? REGMODE_INTER_COPY :
+ currMB->b8mode[i]==1 ? REGMODE_INTER_PRED : REGMODE_INTER_PRED_8x8);
+ if (currMB->b8mode[i]==0 || currMB->b8mode[i]==IBLOCK) // INTRA OR COPY
+ {
+ pRegion->mv[0] = 0;
+ pRegion->mv[1] = 0;
+ pRegion->mv[2] = 0;
+ }
+ else
+ {
+ PicMotion *motion00;
+ PicMotion *motion01;
+ ii = mbx + (i & 0x01)*2;// + BLOCK_SIZE;
+ jj = mby + (i & ~1 );
+ motion00 = &dec_picture->motion.motion[LIST_0][jj][ii];
+ if (currMB->b8mode[i]>=5 && currMB->b8mode[i]<=7) // SMALL BLOCKS
+ {
+ motion01 = &dec_picture->motion.motion[LIST_0][jj+1][ii];
+ pRegion->mv[0] = (motion00[0].mv[0] + motion00[1].mv[0] + motion01[0].mv[0] + motion01[1].mv[0] + 2)/4;
+ pRegion->mv[1] = (motion00[0].mv[1] + motion00[1].mv[1] + motion01[0].mv[1] + motion01[1].mv[1] + 2)/4;
+ }
+ else // 16x16, 16x8, 8x16, 8x8
+ {
+ pRegion->mv[0] = motion00->mv[0];
+ pRegion->mv[1] = motion00->mv[1];
+ }
+ p_Vid->erc_mvperMB += abs(pRegion->mv[0]) + abs(pRegion->mv[1]);
+ pRegion->mv[2] = motion00->ref_idx;
+ }
+ }
+ }
+ else //B-frame
+ {
+ for (i=0; i<4; ++i)
+ {
+ ii = mbx + (i%2)*2;// + BLOCK_SIZE;
+ jj = mby + (i/2)*2;
+ pRegion = currRegion + i;
+ pRegion->regionMode = (currMB->mb_type ==I16MB ? REGMODE_INTRA :
+ currMB->b8mode[i]==IBLOCK ? REGMODE_INTRA_8x8 : REGMODE_INTER_PRED_8x8);
+ if (currMB->mb_type==I16MB || currMB->b8mode[i]==IBLOCK) // INTRA
+ {
+ pRegion->mv[0] = 0;
+ pRegion->mv[1] = 0;
+ pRegion->mv[2] = 0;
+ }
+ else
+ {
+ PicMotion *motion0= & dec_picture->motion.motion[LIST_0][jj][ii];
+ PicMotion *motion1;
+
+ if (motion0->ref_idx<0)
+ {
+ motion0 = & dec_picture->motion.motion[LIST_1][jj][ii];
+ motion1 = & dec_picture->motion.motion[LIST_1][jj+1][ii];
+ }
+ else
+ {
+ motion1 = & dec_picture->motion.motion[LIST_0][jj+1][ii];
+ }
+// int idx = (currMB->b8mode[i]==0 && currMB->b8pdir[i]==2 ? LIST_0 : currMB->b8pdir[i]==1 ? LIST_1 : LIST_0);
+// int idx = currMB->b8pdir[i]==0 ? LIST_0 : LIST_1;
+
+ pRegion->mv[0] = (motion0[0].mv[0] + motion0[1].mv[0] + motion1[0].mv[0] + motion1[1].mv[0] + 2)/4;
+ pRegion->mv[1] = (motion0[0].mv[1] + motion0[1].mv[1] + motion1[0].mv[1] + motion1[1].mv[1] + 2)/4;
+ p_Vid->erc_mvperMB += iabs(pRegion->mv[0]) + iabs(pRegion->mv[1]);
+
+ pRegion->mv[2] = (motion0[0].ref_idx);
+/*
+ if (currMB->b8pdir[i]==0 || (currMB->b8pdir[i]==2 && currMB->b8mode[i]!=0)) // forward or bidirect
+ {
+ pRegion->mv[2] = (dec_picture->motion.ref_idx[LIST_0][jj][ii]);
+ ///???? is it right, not only "p_Vid->fw_refFrArr[jj][ii-4]"
+ }
+ else
+ {
+ pRegion->mv[2] = (dec_picture->motion.ref_idx[LIST_1][jj][ii]);
+// pRegion->mv[2] = 0;
+ }
+ */
+ }
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * set defaults for old_slice
+ * NAL unit of a picture"
+ ************************************************************************
+ */
+void init_old_slice(OldSliceParams *p_old_slice)
+{
+ p_old_slice->field_pic_flag = 0;
+
+ p_old_slice->pps_id = INT_MAX;
+
+ p_old_slice->frame_num = INT_MAX;
+
+ p_old_slice->nal_ref_idc = INT_MAX;
+
+ p_old_slice->idr_flag = FALSE;
+
+ p_old_slice->pic_oder_cnt_lsb = UINT_MAX;
+ p_old_slice->delta_pic_oder_cnt_bottom = INT_MAX;
+
+ p_old_slice->delta_pic_order_cnt[0] = INT_MAX;
+ p_old_slice->delta_pic_order_cnt[1] = INT_MAX;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * save slice parameters that are needed for checking of "first VCL
+ * NAL unit of a picture"
+ ************************************************************************
+ */
+static void exit_slice(Slice *currSlice, OldSliceParams *p_old_slice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ if (currSlice->slice_type == B_SLICE)
+ {
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ int nplane;
+ for( nplane = 0; nplane < MAX_PLANE; ++nplane )
+ {
+ free_colocated(p_Vid, currSlice->Co_located_JV[nplane]);
+ currSlice->Co_located_JV[nplane] = NULL;
+ }
+ }
+ else
+ {
+ free_colocated(p_Vid, currSlice->p_colocated);
+ currSlice->p_colocated = NULL;
+ }
+ }
+
+ p_old_slice->pps_id = currSlice->pic_parameter_set_id;
+
+ p_old_slice->frame_num = p_Vid->frame_num;
+
+ p_old_slice->field_pic_flag = p_Vid->field_pic_flag;
+
+ if(p_Vid->field_pic_flag)
+ {
+ p_old_slice->bottom_field_flag = p_Vid->bottom_field_flag;
+ }
+
+ p_old_slice->nal_ref_idc = p_Vid->nal_reference_idc;
+
+ p_old_slice->idr_flag = (byte) p_Vid->idr_flag;
+ if (p_Vid->idr_flag)
+ {
+ p_old_slice->idr_pic_id = p_Vid->idr_pic_id;
+ }
+
+ if (p_Vid->active_sps->pic_order_cnt_type == 0)
+ {
+ p_old_slice->pic_oder_cnt_lsb = p_Vid->pic_order_cnt_lsb;
+ p_old_slice->delta_pic_oder_cnt_bottom = p_Vid->delta_pic_order_cnt_bottom;
+ }
+
+ if (p_Vid->active_sps->pic_order_cnt_type == 1)
+ {
+ p_old_slice->delta_pic_order_cnt[0] = p_Vid->delta_pic_order_cnt[0];
+ p_old_slice->delta_pic_order_cnt[1] = p_Vid->delta_pic_order_cnt[1];
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * detect if current slice is "first VCL NAL unit of a picture"
+ ************************************************************************
+ */
+int is_new_picture(StorablePicture *dec_picture, Slice *currSlice, OldSliceParams *p_old_slice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ int result=0;
+
+ result |= (NULL==dec_picture);
+
+ result |= (p_old_slice->pps_id != currSlice->pic_parameter_set_id);
+
+ result |= (p_old_slice->frame_num != p_Vid->frame_num);
+
+ result |= (p_old_slice->field_pic_flag != p_Vid->field_pic_flag);
+
+ if(p_Vid->field_pic_flag && p_old_slice->field_pic_flag)
+ {
+ result |= (p_old_slice->bottom_field_flag != p_Vid->bottom_field_flag);
+ }
+
+ result |= (p_old_slice->nal_ref_idc != p_Vid->nal_reference_idc) && ((p_old_slice->nal_ref_idc == 0) || (p_Vid->nal_reference_idc == 0));
+
+ result |= ( p_old_slice->idr_flag != p_Vid->idr_flag);
+
+ if (p_Vid->idr_flag && p_old_slice->idr_flag)
+ {
+ result |= (p_old_slice->idr_pic_id != p_Vid->idr_pic_id);
+ }
+
+ if (p_Vid->active_sps->pic_order_cnt_type == 0)
+ {
+ result |= (p_old_slice->pic_oder_cnt_lsb != p_Vid->pic_order_cnt_lsb);
+ result |= (p_old_slice->delta_pic_oder_cnt_bottom != p_Vid->delta_pic_order_cnt_bottom);
+ }
+
+ if (p_Vid->active_sps->pic_order_cnt_type == 1)
+ {
+ result |= (p_old_slice->delta_pic_order_cnt[0] != p_Vid->delta_pic_order_cnt[0]);
+ result |= (p_old_slice->delta_pic_order_cnt[1] != p_Vid->delta_pic_order_cnt[1]);
+ }
+
+ return result;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Prepare field and frame buffer after frame decoding
+ ************************************************************************
+ */
+void frame_postprocessing(VideoParameters *p_Vid)
+{
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Prepare field and frame buffer after field decoding
+ ************************************************************************
+ */
+void field_postprocessing(VideoParameters *p_Vid)
+{
+ p_Vid->number /= 2;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * copy StorablePicture *src -> StorablePicture *dst
+ * for 4:4:4 Independent mode
+ ************************************************************************
+ */
+void copy_dec_picture_JV( VideoParameters *p_Vid, StorablePicture *dst, StorablePicture *src )
+{
+ dst->top_poc = src->top_poc;
+ dst->bottom_poc = src->bottom_poc;
+ dst->frame_poc = src->frame_poc;
+ dst->qp = src->qp;
+ dst->slice_qp_delta = src->slice_qp_delta;
+ dst->chroma_qp_offset[0] = src->chroma_qp_offset[0];
+ dst->chroma_qp_offset[1] = src->chroma_qp_offset[1];
+
+ dst->poc = src->poc;
+
+ dst->slice_type = src->slice_type;
+ dst->used_for_reference = src->used_for_reference;
+ dst->idr_flag = src->idr_flag;
+ dst->no_output_of_prior_pics_flag = src->no_output_of_prior_pics_flag;
+ dst->long_term_reference_flag = src->long_term_reference_flag;
+ dst->adaptive_ref_pic_buffering_flag = src->adaptive_ref_pic_buffering_flag;
+
+ dst->dec_ref_pic_marking_buffer = src->dec_ref_pic_marking_buffer;
+
+ dst->mb_aff_frame_flag = src->mb_aff_frame_flag;
+ dst->PicWidthInMbs = src->PicWidthInMbs;
+ dst->pic_num = src->pic_num;
+ dst->frame_num = src->frame_num;
+ dst->recovery_frame = src->recovery_frame;
+ dst->coded_frame = src->coded_frame;
+
+ dst->chroma_format_idc = src->chroma_format_idc;
+
+ dst->frame_mbs_only_flag = src->frame_mbs_only_flag;
+ dst->frame_cropping_flag = src->frame_cropping_flag;
+
+ dst->frame_cropping_rect_left_offset = src->frame_cropping_rect_left_offset;
+ dst->frame_cropping_rect_right_offset = src->frame_cropping_rect_right_offset;
+ dst->frame_cropping_rect_top_offset = src->frame_cropping_rect_top_offset;
+ dst->frame_cropping_rect_bottom_offset = src->frame_cropping_rect_bottom_offset;
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ // store the necessary tone mapping sei into StorablePicture structure
+ dst->seiHasTone_mapping = src->seiHasTone_mapping;
+
+ dst->seiHasTone_mapping = src->seiHasTone_mapping;
+ dst->tone_mapping_model_id = src->tone_mapping_model_id;
+ dst->tonemapped_bit_depth = src->tonemapped_bit_depth;
+ if( src->tone_mapping_lut )
+ {
+ dst->tone_mapping_lut = malloc(sizeof(int)*(1 << p_Vid->seiToneMapping->coded_data_bit_depth));
+ if (NULL == dst->tone_mapping_lut)
+ {
+ no_mem_exit("copy_dec_picture_JV: tone_mapping_lut");
+ }
+ memcpy(dst->tone_mapping_lut, src->tone_mapping_lut, sizeof(imgpel)*(1<<p_Vid->seiToneMapping->coded_data_bit_depth));
+ }
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * decodes one slice
+ ************************************************************************
+ */
+void decode_one_slice(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ Boolean end_of_slice = FALSE;
+ Macroblock *currMB = NULL;
+ p_Vid->cod_counter=-1;
+
+ setup_slice_methods(currSlice);
+
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ change_plane_JV( p_Vid, p_Vid->colour_plane_id );
+ }
+
+ set_ref_pic_num(p_Vid);
+
+ if (currSlice->slice_type == B_SLICE)
+ {
+ currSlice->compute_colocated(currSlice, currSlice->p_colocated, p_Vid->listX);
+ }
+
+ //reset_ec_flags(p_Vid);
+
+ while (end_of_slice == FALSE) // loop over macroblocks
+ {
+
+#if TRACE
+ fprintf(p_trace,"\n*********** POC: %i (I/P) MB: %i Slice: %i Type %d **********\n", p_Vid->ThisPOC, p_Vid->current_mb_nr, p_Vid->current_slice_nr, currSlice->slice_type);
+#endif
+
+ // Initializes the current macroblock
+ start_macroblock(currSlice, &currMB);
+ // Get the syntax elements from the NAL
+ currSlice->read_one_macroblock(currMB);
+ decode_one_macroblock(currMB, p_Vid->dec_picture);
+
+ if(currSlice->mb_aff_frame_flag && p_Vid->dec_picture->motion.mb_field[p_Vid->current_mb_nr])
+ {
+ currSlice->num_ref_idx_l0_active >>= 1;
+ currSlice->num_ref_idx_l1_active >>= 1;
+ }
+
+ ercWriteMBMODEandMV(currMB);
+
+ end_of_slice = exit_macroblock(currSlice, (!currSlice->mb_aff_frame_flag||p_Vid->current_mb_nr%2));
+ }
+
+ exit_slice(currSlice, p_Vid->old_slice);
+ //reset_ec_flags(p_Vid);
+}
diff --git a/Src/h264dec/ldecod/src/intra16x16_pred.c b/Src/h264dec/ldecod/src/intra16x16_pred.c
new file mode 100644
index 00000000..a2b89ca2
--- /dev/null
+++ b/Src/h264dec/ldecod/src/intra16x16_pred.c
@@ -0,0 +1,428 @@
+/*!
+ *************************************************************************************
+ * \file intra16x16_pred.c
+ *
+ * \brief
+ * Functions for intra 8x8 prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Yuri Vatis
+ * - Jan Muenster
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+#include "global.h"
+#include "intra16x16_pred.h"
+#include "mb_access.h"
+#include "image.h"
+#include <emmintrin.h>
+static void memset_16x16(h264_imgpel_macroblock_row_t *mb_pred, int pred)
+{
+ if (sse2_flag)
+ {
+ __m128i xmm_pred = _mm_set1_epi8(pred);
+ int i;
+ __m128i *xmm_macroblock = (__m128i *)mb_pred;
+ for (i=0;i<16;i++)
+ {
+ _mm_store_si128(xmm_macroblock++, xmm_pred);
+ }
+ }
+#ifdef _M_IX86
+ else
+ {
+ __m64 mmx_pred = _mm_set1_pi8(pred);
+ int i;
+ __m64 *mmx_macroblock = (__m64 *)mb_pred;
+ for (i=0;i<16;i++)
+ {
+ *mmx_macroblock++ = mmx_pred;
+ *mmx_macroblock++ = mmx_pred;
+ }
+ }
+#else
+ else
+ {
+ int ii, jj;
+ for (jj = 0; jj < MB_BLOCK_SIZE; jj++)
+ {
+ for (ii = 0; ii < MB_BLOCK_SIZE; ii++)
+ {
+ mb_pred[jj][ii]=(imgpel) pred;
+ }
+ }
+ }
+ #endif
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 16x16 DC prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra16x16_dc_pred(Macroblock *currMB,
+ ColorPlane pl)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int s0 = 0, s1 = 0, s2 = 0;
+
+ int i;
+
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos up; //!< pixel position p(0,-1)
+ PixelPos left[17]; //!< pixel positions p(-1, -1..15)
+
+ int up_avail, left_avail, left_up_avail;
+
+ s1=s2=0;
+
+ p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]);
+ p_Vid->getNeighbourLeftLuma(currMB, &left[1]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 2-1, &left[2]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 3-1, &left[3]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 4-1, &left[4]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 5-1, &left[5]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 6-1, &left[6]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 7-1, &left[7]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 8-1, &left[8]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 9-1, &left[9]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 10-1, &left[10]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 11-1, &left[11]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 12-1, &left[12]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 13-1, &left[13]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 14-1, &left[14]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 15-1, &left[15]);
+ p_Vid->getNeighbourNPLumaNB(currMB, 16-1, &left[16]);
+
+ p_Vid->getNeighbourUpLuma(currMB, &up);
+
+ if (!p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ up_avail = up.available;
+ left_avail = left[1].available;
+ left_up_avail = left[0].available;
+ }
+ else
+ {
+ up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
+ for (i = 1, left_avail = 1; i < 17; ++i)
+ left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0;
+ left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
+ }
+
+ if (up_avail)
+ {
+ s1 += imgY[up.pos_y][up.pos_x+0]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+1]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+2]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+3]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+4]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+5]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+6]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+7]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+8]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+9]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+10]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+11]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+12]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+13]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+14]; // sum hor pix
+ s1 += imgY[up.pos_y][up.pos_x+15]; // sum hor pix
+ }
+
+ if (left_avail)
+ {
+ s2 += imgY[left[0 + 1].pos_y][left[0 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[1 + 1].pos_y][left[1 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[2 + 1].pos_y][left[2 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[3 + 1].pos_y][left[3 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[4 + 1].pos_y][left[4 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[5 + 1].pos_y][left[5 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[6 + 1].pos_y][left[6 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[7 + 1].pos_y][left[7 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[8 + 1].pos_y][left[8 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[9 + 1].pos_y][left[9 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[10 + 1].pos_y][left[10 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[11 + 1].pos_y][left[11 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[12 + 1].pos_y][left[12 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[13 + 1].pos_y][left[13 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[14 + 1].pos_y][left[14 + 1].pos_x]; // sum vert pix
+ s2 += imgY[left[15 + 1].pos_y][left[15 + 1].pos_x]; // sum vert pix
+ }
+
+ if (up_avail && left_avail)
+ s0 = (s1 + s2 + 16)>>5; // no edge
+ else if (!up_avail && left_avail)
+ s0 = (s2 + 8)>>4; // upper edge
+ else if (up_avail && !left_avail)
+ s0 = (s1 + 8)>>4; // left edge
+ else
+ s0 = p_Vid->dc_pred_value_comp[pl]; // top left corner, nothing to predict from
+
+ memset_16x16(currSlice->mb_pred[pl], s0);
+
+ return DECODING_OK;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 16x16 vertical prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra16x16_vert_pred(Macroblock *currMB,
+ ColorPlane pl)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int j;
+
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos up; //!< pixel position p(0,-1)
+
+ int up_avail;
+
+ p_Vid->getNeighbourUpLuma(currMB, &up);
+
+ if (!p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ up_avail = up.available;
+ }
+ else
+ {
+ up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
+ }
+
+ if (!up_avail)
+ error ("invalid 16x16 intra pred Mode VERT_PRED_16",500);
+
+ for(j=0;j<MB_BLOCK_SIZE;++j)
+ {
+ // TODO; take advantage of imgY's stride
+ memcpy(&currSlice->mb_pred[pl][j][0], &(imgY[up.pos_y][up.pos_x]), MB_BLOCK_SIZE * sizeof(imgpel));
+ }
+
+ return DECODING_OK;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 16x16 horizontal prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra16x16_hor_pred(Macroblock *currMB,
+ ColorPlane pl)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i,j;
+
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+ imgpel prediction;
+
+ PixelPos left[17]; //!< pixel positions p(-1, -1..15)
+
+ int left_avail, left_up_avail;
+
+ for (i=0;i<17;++i)
+ {
+ p_Vid->getNeighbourNXLuma(currMB, i-1, &left[i]);
+ }
+
+ if (!p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ left_avail = left[1].available;
+ left_up_avail = left[0].available;
+ }
+ else
+ {
+ for (i = 1, left_avail = 1; i < 17; ++i)
+ left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0;
+ left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
+ }
+
+ if (!left_avail)
+ error ("invalid 16x16 intra pred Mode HOR_PRED_16",500);
+
+ for(j = 0; j < MB_BLOCK_SIZE; ++j)
+ {
+ prediction = imgY[left[j+1].pos_y][left[j+1].pos_x];
+ for(i = 0; i < MB_BLOCK_SIZE; ++i)
+ currSlice->mb_pred[pl][j][i]= prediction; // store predicted 16x16 block
+ }
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 16x16 horizontal prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static void planeset(h264_imgpel_macroblock_row_t *dest, int iaa, int ib, int ic)
+{
+ int j;
+ __m128i i0_7 = _mm_setr_epi16(-7,-6,-5,-4,-3,-2,-1, 0);
+ __m128i i8_15 = _mm_setr_epi16(1,2,3,4,5,6,7,8);
+ __m128i xmm_ib = _mm_set1_epi16(ib);
+ int j7ic = iaa + -7 * ic + 16;
+ i0_7 = _mm_mullo_epi16(i0_7, xmm_ib);
+ i8_15 = _mm_mullo_epi16(i8_15, xmm_ib);
+ for (j = 0;j < MB_BLOCK_SIZE; ++j)
+ {
+ __m128i xmm_j7ic = _mm_set1_epi16(j7ic);
+ __m128i xmm_lo = _mm_add_epi16(i0_7, xmm_j7ic);
+ __m128i xmm_hi = _mm_add_epi16(i8_15, xmm_j7ic);
+ __m128i xmm_store;
+ xmm_lo = _mm_srai_epi16(xmm_lo, 5);
+ xmm_hi = _mm_srai_epi16(xmm_hi, 5);
+ xmm_store = _mm_packus_epi16(xmm_lo, xmm_hi);
+ _mm_store_si128((__m128i *)dest[j], xmm_store);
+ j7ic += ic;
+ }// store plane prediction
+}
+
+static inline int intra16x16_plane_pred(Macroblock *currMB,
+ ColorPlane pl)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i,j;
+
+ int ih = 0, iv = 0;
+ int ib,ic,iaa;
+
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+ imgpel *mpr_line;
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+
+ PixelPos up; //!< pixel position p(0,-1)
+ PixelPos left[17]; //!< pixel positions p(-1, -1..15)
+
+ int up_avail, left_avail, left_up_avail;
+
+ p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]);
+ p_Vid->getNeighbourLeftLuma(currMB, &left[1]);
+ for (i=2;i<17; ++i)
+ {
+ p_Vid->getNeighbourNPLumaNB(currMB, i-1, &left[i]);
+ }
+ p_Vid->getNeighbourUpLuma(currMB, &up);
+
+ if (!p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ up_avail = up.available;
+ left_avail = left[1].available;
+ left_up_avail = left[0].available;
+ }
+ else
+ {
+ up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
+ for (i = 1, left_avail = 1; i < 17; ++i)
+ left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0;
+ left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
+ }
+
+ if (!up_avail || !left_up_avail || !left_avail)
+ error ("invalid 16x16 intra pred Mode PLANE_16",500);
+
+ mpr_line = &imgY[up.pos_y][up.pos_x+7];
+ for (i = 1; i < 8; ++i)
+ {
+ ih += i*(mpr_line[i] - mpr_line[-i]);
+ iv += i*(imgY[left[8+i].pos_y][left[8+i].pos_x] - imgY[left[8-i].pos_y][left[8-i].pos_x]);
+ }
+
+ ih += 8*(mpr_line[8] - imgY[left[0].pos_y][left[0].pos_x]);
+ iv += 8*(imgY[left[16].pos_y][left[16].pos_x] - imgY[left[0].pos_y][left[0].pos_x]);
+
+ ib=(5 * ih + 32)>>6;
+ ic=(5 * iv + 32)>>6;
+
+ iaa=16 * (mpr_line[8] + imgY[left[16].pos_y][left[16].pos_x]);
+ if (sse2_flag)
+ {
+ planeset(currSlice->mb_pred[pl], iaa, ib, ic);
+ }
+ else
+ {
+ // TODO: MMX
+ for (j = 0;j < MB_BLOCK_SIZE; ++j)
+ {
+ int j7ic = iaa + (j - 7) * ic + 16;
+ for (i = 0;i < MB_BLOCK_SIZE; ++i)
+ {
+ currSlice->mb_pred[pl][j][i] = (imgpel) iClip1(max_imgpel_value, (((i - 7) * ib + j7ic) >> 5));
+ }
+ }// store plane prediction
+ }
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 16x16 intra prediction blocks
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ * SEARCH_SYNC search next sync element as errors while decoding occured
+ ***********************************************************************
+ */
+// TODO: replace with ippiPredictIntra_16x16_H264_8u_C1IR ?
+int intrapred16x16(Macroblock *currMB, //!< Current Macroblock
+ ColorPlane pl, //!< Current colorplane (for 4:4:4)
+ int predmode) //!< prediction mode
+{
+ switch (predmode)
+ {
+ case VERT_PRED_16: // vertical prediction from block above
+ return (intra16x16_vert_pred(currMB, pl));
+ break;
+ case HOR_PRED_16: // horizontal prediction from left block
+ return (intra16x16_hor_pred(currMB, pl));
+ break;
+ case DC_PRED_16: // DC prediction
+ return (intra16x16_dc_pred(currMB, pl));
+ break;
+ case PLANE_16:// 16 bit integer plan pred
+ return (intra16x16_plane_pred(currMB, pl));
+ break;
+ default:
+ { // indication of fault in bitstream,exit
+ printf("illegal 16x16 intra prediction mode input: %d\n",predmode);
+ return SEARCH_SYNC;
+ }
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/intra4x4_pred.c b/Src/h264dec/ldecod/src/intra4x4_pred.c
new file mode 100644
index 00000000..21f16ed6
--- /dev/null
+++ b/Src/h264dec/ldecod/src/intra4x4_pred.c
@@ -0,0 +1,854 @@
+/*!
+ *************************************************************************************
+ * \file intra4x4_pred.c
+ *
+ * \brief
+ * Functions for intra 4x4 prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+#include "global.h"
+#include "intra4x4_pred.h"
+#include "mb_access.h"
+#include "image.h"
+
+// Notation for comments regarding prediction and predictors.
+// The pels of the 4x4 block are labelled a..p. The predictor pels above
+// are labelled A..H, from the left I..L, and from above left X, as follows:
+//
+// X A B C D E F G H
+// I a b c d
+// J e f g h
+// K i j k l
+// L m n o p
+//
+
+// Predictor array index definitions
+#define P_X (PredPel[0])
+#define P_A (PredPel[1])
+#define P_B (PredPel[2])
+#define P_C (PredPel[3])
+#define P_D (PredPel[4])
+#define P_E (PredPel[5])
+#define P_F (PredPel[6])
+#define P_G (PredPel[7])
+#define P_H (PredPel[8])
+#define P_I (PredPel[9])
+#define P_J (PredPel[10])
+#define P_K (PredPel[11])
+#define P_L (PredPel[12])
+
+static void memset_4x4(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, int pred)
+{
+#ifdef _M_IX86
+ // benski> can't believe the shitty code that the compiler generated... this code is better
+ int dword_pred = pred * 0x01010101;
+ mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x];
+ *(int *)mb_pred[0] = dword_pred;
+ *(int *)mb_pred[1] = dword_pred;
+ *(int *)mb_pred[2] = dword_pred;
+ *(int *)mb_pred[3] = dword_pred;
+#else
+ int ii, jj;
+ for (jj = 0; jj < BLOCK_SIZE; jj++)
+ {
+ for (ii = 0; ii < BLOCK_SIZE; ii++)
+ {
+ mb_pred[jj][offset_x+ii]=(imgpel) pred;
+ }
+ }
+#endif
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 DC prediction mode
+ *
+ * \param currMB
+ * current MB structure
+ * \param pl
+ * color plane
+ * \param ioff
+ * pixel offset X within MB
+ * \param joff
+ * pixel offset Y within MB
+ * \return
+ * DECODING_OK decoding of intra prediction mode was successful \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_dc_pred(Macroblock *currMB,
+ ColorPlane pl,
+ int ioff,
+ int joff)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+ int s0 = 0;
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_a[4], pix_b;
+
+ int block_available_up;
+ int block_available_left;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]);
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<4;++i)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ }
+ // TODO: ippiPredictIntra_4x4_H264_8u_C1IR
+ // form predictor pels
+ if (block_available_up)
+ {
+ s0 += imgY[pix_b.pos_y][pix_b.pos_x + 0];
+ s0 += imgY[pix_b.pos_y][pix_b.pos_x + 1];
+ s0 += imgY[pix_b.pos_y][pix_b.pos_x + 2];
+ s0 += imgY[pix_b.pos_y][pix_b.pos_x + 3];
+ }
+
+ if (block_available_left)
+ {
+ s0 += imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ s0 += imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ s0 += imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ s0 += imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ }
+
+ if (block_available_up && block_available_left)
+ {
+ // no edge
+ s0 = (s0 + 4)>>3;
+ }
+ else if (!block_available_up && block_available_left)
+ {
+ // upper edge
+ s0 = (s0 + 2)>>2;
+ }
+ else if (block_available_up && !block_available_left)
+ {
+ // left edge
+ s0 = (s0 + 2)>>2;
+ }
+ else //if (!block_available_up && !block_available_left)
+ {
+ // top left corner, nothing to predict from
+ s0 = p_Vid->dc_pred_value_comp[pl];
+ }
+
+ memset_4x4(&currSlice->mb_pred[pl][joff], ioff, s0);
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 vertical prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_vert_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+ imgpel *src;
+ PixelPos pix_b;
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff, joff - 1 , &pix_b);
+#ifdef H264_WARNINGS
+ {
+ int block_available_up;
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_up = pix_b.available;
+ }
+
+ if (!block_available_up)
+ printf ("warning: Intra_4x4_Vertical prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+ }
+#endif
+ src = &(imgY[pix_b.pos_y][pix_b.pos_x]);
+ /* store predicted 4x4 block */
+ memcpy(&(currSlice->mb_pred[pl][joff][ioff]), src, BLOCK_SIZE * sizeof(imgpel));
+ memcpy(&(currSlice->mb_pred[pl][joff+1][ioff]), src, BLOCK_SIZE * sizeof(imgpel));
+ memcpy(&(currSlice->mb_pred[pl][joff+2][ioff]), src, BLOCK_SIZE * sizeof(imgpel));
+ memcpy(&(currSlice->mb_pred[pl][joff+3][ioff]), src, BLOCK_SIZE * sizeof(imgpel));
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 horizontal prediction mode
+ *
+ * \param currMB
+ * current MB structure
+ * \param pl
+ * color plane
+ * \param ioff
+ * pixel offset X within MB
+ * \param joff
+ * pixel offset Y within MB
+ *
+ * \return
+ * DECODING_OK decoding of intra prediction mode was successful
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_hor_pred(Macroblock *currMB,
+ ColorPlane pl,
+ int ioff,
+ int joff)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+
+ int j;
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_a[4];
+
+ imgpel prediction;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 0 , &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 1 , &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 2 , &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 3 , &pix_a[3]);
+
+#ifdef H264_WARNINGS
+ {
+ int i;
+ int block_available_left;
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<4;++i)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ }
+
+ if (!block_available_left)
+ printf ("warning: Intra_4x4_Horizontal prediction mode not allowed at mb %d\n",(int) p_Vid->current_mb_nr);
+ }
+#endif
+ for(j=0;j<BLOCK_SIZE;++j)
+ {
+ // TODO: write using imgY's stride
+ prediction = imgY[pix_a[j].pos_y][pix_a[j].pos_x];
+ //for(i = ioff;i < ioff + BLOCK_SIZE;++i)
+ currSlice->mb_pred[pl][j+joff][ioff]= prediction; /* store predicted 4x4 block */
+ currSlice->mb_pred[pl][j+joff][ioff+1]= prediction; /* store predicted 4x4 block */
+ currSlice->mb_pred[pl][j+joff][ioff+2]= prediction; /* store predicted 4x4 block */
+ currSlice->mb_pred[pl][j+joff][ioff+3]= prediction; /* store predicted 4x4 block */
+ }
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 diagonal down right prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_diag_down_right_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+
+ imgpel PredPel[13]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_a[4];
+ PixelPos pix_b, pix_d;
+
+
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 0 , &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 1 , &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 2 , &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1 , joff + 3 , &pix_a[3]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b);
+ p_Vid->getNeighbourLuma(currMB, ioff -1 , joff -1 , &pix_d);
+#ifdef H264_WARNINGS
+ {
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ int i;
+ for (i=0, block_available_left=1; i<4;++i)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if ((!block_available_up)||(!block_available_left)||(!block_available_up_left))
+ printf ("warning: Intra_4x4_Diagonal_Down_Right prediction mode not allowed at mb %d\n",(int) p_Vid->current_mb_nr);
+}
+#endif
+ // form predictor pels
+ P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0];
+ P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1];
+ P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2];
+ P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3];
+
+ P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+
+ P_X = imgY[pix_d.pos_y][pix_d.pos_x];
+
+ currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_L + 2*P_K + P_J + 2) >> 2);
+ currSlice->mb_pred[pl][joff+2][ioff+0] =
+ currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_K + 2*P_J + P_I + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+0] =
+ currSlice->mb_pred[pl][joff+2][ioff+1] =
+ currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_J + 2*P_I + P_X + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+0] =
+ currSlice->mb_pred[pl][joff+1][ioff+1] =
+ currSlice->mb_pred[pl][joff+2][ioff+2] =
+ currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+1] =
+ currSlice->mb_pred[pl][joff+1][ioff+2] =
+ currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+2] =
+ currSlice->mb_pred[pl][joff+1][ioff+3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+3] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 diagonal down left prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_diag_down_left_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int block_available_up_right;
+ #ifdef H264_WARNINGS
+ int block_available_up;
+#endif
+ imgpel PredPel[13]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_b, pix_c;
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourLuma(currMB, ioff + 4, joff - 1, &pix_c);
+
+ pix_c.available = pix_c.available && !((ioff==4) && ((joff==4)||(joff==12)));
+
+
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ #ifdef H264_WARNINGS
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+#endif
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ }
+ else
+ {
+ #ifdef H264_WARNINGS
+ block_available_up = pix_b.available;
+#endif
+ block_available_up_right = pix_c.available;
+ }
+#ifdef H264_WARNINGS
+ if (!block_available_up)
+ printf ("warning: Intra_4x4_Diagonal_Down_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+#endif
+ // form predictor pels
+ P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0];
+ P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1];
+ P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2];
+ P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3];
+
+ if (block_available_up_right)
+ {
+ P_E = imgY[pix_c.pos_y][pix_c.pos_x + 0];
+ P_F = imgY[pix_c.pos_y][pix_c.pos_x + 1];
+ P_G = imgY[pix_c.pos_y][pix_c.pos_x + 2];
+ P_H = imgY[pix_c.pos_y][pix_c.pos_x + 3];
+ }
+ else
+ {
+ P_E = P_F = P_G = P_H = P_D;
+ }
+
+ currSlice->mb_pred[pl][joff+0][ioff+0] = (imgpel) ((P_A + P_C + 2*(P_B) + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+1] =
+ currSlice->mb_pred[pl][joff+1][ioff+0] = (imgpel) ((P_B + P_D + 2*(P_C) + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+2] =
+ currSlice->mb_pred[pl][joff+1][ioff+1] =
+ currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_C + P_E + 2*(P_D) + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+3] =
+ currSlice->mb_pred[pl][joff+1][ioff+2] =
+ currSlice->mb_pred[pl][joff+2][ioff+1] =
+ currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_D + P_F + 2*(P_E) + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+3] =
+ currSlice->mb_pred[pl][joff+2][ioff+2] =
+ currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_E + P_G + 2*(P_F) + 2) >> 2);
+ currSlice->mb_pred[pl][joff+2][ioff+3] =
+ currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_F + P_H + 2*(P_G) + 2) >> 2);
+ currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_G + 3*(P_H) + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 vertical right prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_vert_right_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+
+ imgpel PredPel[13]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_a[4];
+ PixelPos pix_b, pix_d;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b);
+ p_Vid->getNeighbourLuma(currMB, ioff -1 , joff -1 , &pix_d);
+#ifdef H264_WARNINGS
+ {
+ int i;
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<4;++i)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if ((!block_available_up)||(!block_available_left)||(!block_available_up_left))
+ printf ("warning: Intra_4x4_Vertical_Right prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+}
+#endif
+ // form predictor pels
+ P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0];
+ P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1];
+ P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2];
+ P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3];
+
+ P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+
+ P_X = imgY[pix_d.pos_y][pix_d.pos_x];
+
+ currSlice->mb_pred[pl][joff+0][ioff+0] =
+ currSlice->mb_pred[pl][joff+2][ioff+1] = (imgpel) ((P_X + P_A + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+1] =
+ currSlice->mb_pred[pl][joff+2][ioff+2] = (imgpel) ((P_A + P_B + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+2] =
+ currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_B + P_C + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+3] = (imgpel) ((P_C + P_D + 1) >> 1);
+ currSlice->mb_pred[pl][joff+1][ioff+0] =
+ currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+1] =
+ currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+2] =
+ currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+3] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2);
+ currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_X + 2*P_I + P_J + 2) >> 2);
+ currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 vertical left prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_vert_left_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ imgpel PredPel[13]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_b, pix_c;
+
+ int block_available_up;
+ int block_available_up_right;
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b);
+ p_Vid->getNeighbourLuma(currMB, ioff +4 , joff -1 , &pix_c);
+
+ pix_c.available = pix_c.available && !((ioff==4) && ((joff==4)||(joff==12)));
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ }
+
+
+ if (!block_available_up)
+ printf ("warning: Intra_4x4_Vertical_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0];
+ P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1];
+ P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2];
+ P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3];
+
+ if (block_available_up_right)
+ {
+ P_E = imgY[pix_c.pos_y][pix_c.pos_x + 0];
+ P_F = imgY[pix_c.pos_y][pix_c.pos_x + 1];
+ P_G = imgY[pix_c.pos_y][pix_c.pos_x + 2];
+ P_H = imgY[pix_c.pos_y][pix_c.pos_x + 3];
+ }
+ else
+ {
+ P_E = P_F = P_G = P_H = P_D;
+ }
+
+ currSlice->mb_pred[pl][joff+0][ioff+0] = (imgpel) ((P_A + P_B + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+1] =
+ currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_B + P_C + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+2] =
+ currSlice->mb_pred[pl][joff+2][ioff+1] = (imgpel) ((P_C + P_D + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+3] =
+ currSlice->mb_pred[pl][joff+2][ioff+2] = (imgpel) ((P_D + P_E + 1) >> 1);
+ currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_E + P_F + 1) >> 1);
+ currSlice->mb_pred[pl][joff+1][ioff+0] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+1] =
+ currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+2] =
+ currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_C + 2*P_D + P_E + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+3] =
+ currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_D + 2*P_E + P_F + 2) >> 2);
+ currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_E + 2*P_F + P_G + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 horizontal up prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_hor_up_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ imgpel PredPel[13]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_a[4];
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]);
+
+#ifdef H264_WARNINGS
+ {
+ int i;
+ int block_available_left;
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<4;++i)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ }
+
+ if (!block_available_left)
+ printf ("warning: Intra_4x4_Horizontal_Up prediction mode not allowed at mb %d\n",(int) p_Vid->current_mb_nr);
+ }
+#endif
+ // form predictor pels
+ P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+
+ currSlice->mb_pred[pl][joff+0][ioff+0] = (imgpel) ((P_I + P_J + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+1] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+2] =
+ currSlice->mb_pred[pl][joff+1][ioff+0] = (imgpel) ((P_J + P_K + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+3] =
+ currSlice->mb_pred[pl][joff+1][ioff+1] = (imgpel) ((P_J + 2*P_K + P_L + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+2] =
+ currSlice->mb_pred[pl][joff+2][ioff+0] = (imgpel) ((P_K + P_L + 1) >> 1);
+ currSlice->mb_pred[pl][joff+1][ioff+3] =
+ currSlice->mb_pred[pl][joff+2][ioff+1] = (imgpel) ((P_K + 2*P_L + P_L + 2) >> 2);
+ currSlice->mb_pred[pl][joff+2][ioff+3] =
+ currSlice->mb_pred[pl][joff+3][ioff+1] =
+ currSlice->mb_pred[pl][joff+3][ioff+0] =
+ currSlice->mb_pred[pl][joff+2][ioff+2] =
+ currSlice->mb_pred[pl][joff+3][ioff+2] =
+ currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) P_L;
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 horizontal down prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra4x4_hor_down_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+ imgpel PredPel[13]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
+
+ PixelPos pix_a[4];
+ PixelPos pix_b, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +0 , &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +1 , &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +2 , &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff -1 , joff +3 , &pix_a[3]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff -1 , &pix_b);
+ p_Vid->getNeighbourLuma(currMB, ioff -1 , joff -1 , &pix_d);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<4;++i)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if ((!block_available_up)||(!block_available_left)||(!block_available_up_left))
+ printf ("warning: Intra_4x4_Horizontal_Down prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ P_A = imgY[pix_b.pos_y][pix_b.pos_x + 0];
+ P_B = imgY[pix_b.pos_y][pix_b.pos_x + 1];
+ P_C = imgY[pix_b.pos_y][pix_b.pos_x + 2];
+ P_D = imgY[pix_b.pos_y][pix_b.pos_x + 3];
+
+ P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+
+ P_X = imgY[pix_d.pos_y][pix_d.pos_x];
+
+ currSlice->mb_pred[pl][joff+0][ioff+0] =
+ currSlice->mb_pred[pl][joff+1][ioff+2] = (imgpel) ((P_X + P_I + 1) >> 1);
+ currSlice->mb_pred[pl][joff+0][ioff+1] =
+ currSlice->mb_pred[pl][joff+1][ioff+3] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+2] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2);
+ currSlice->mb_pred[pl][joff+0][ioff+3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+ currSlice->mb_pred[pl][joff+1][ioff+0] =
+ currSlice->mb_pred[pl][joff+2][ioff+2] = (imgpel) ((P_I + P_J + 1) >> 1);
+ currSlice->mb_pred[pl][joff+1][ioff+1] =
+ currSlice->mb_pred[pl][joff+2][ioff+3] = (imgpel) ((P_X + 2*P_I + P_J + 2) >> 2);
+ currSlice->mb_pred[pl][joff+2][ioff+0] =
+ currSlice->mb_pred[pl][joff+3][ioff+2] = (imgpel) ((P_J + P_K + 1) >> 1);
+ currSlice->mb_pred[pl][joff+2][ioff+1] =
+ currSlice->mb_pred[pl][joff+3][ioff+3] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2);
+ currSlice->mb_pred[pl][joff+3][ioff+0] = (imgpel) ((P_K + P_L + 1) >> 1);
+ currSlice->mb_pred[pl][joff+3][ioff+1] = (imgpel) ((P_J + 2*P_K + P_L + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 4x4 intra prediction blocks
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ * SEARCH_SYNC search next sync element as errors while decoding occured
+ ***********************************************************************
+ */
+int intrapred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff, //!< pixel offset Y within MB
+ int img_block_x, //!< location of block X, multiples of 4
+ int img_block_y) //!< location of block Y, multiples of 4
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ byte predmode = p_Vid->ipredmode[img_block_y][img_block_x];
+ currMB->ipmode_DPCM = predmode; //For residual DPCM
+
+ switch (predmode)
+ {
+ case DC_PRED:
+ return (intra4x4_dc_pred(currMB, pl, ioff, joff));
+ break;
+ case VERT_PRED:
+ return (intra4x4_vert_pred(currMB, pl, ioff, joff));
+ break;
+ case HOR_PRED:
+ return (intra4x4_hor_pred(currMB, pl, ioff, joff));
+ break;
+ case DIAG_DOWN_RIGHT_PRED:
+ return (intra4x4_diag_down_right_pred(currMB, pl, ioff, joff));
+ break;
+ case DIAG_DOWN_LEFT_PRED:
+ return (intra4x4_diag_down_left_pred(currMB, pl, ioff, joff));
+ break;
+ case VERT_RIGHT_PRED:
+ return (intra4x4_vert_right_pred(currMB, pl, ioff, joff));
+ break;
+ case VERT_LEFT_PRED:
+ return (intra4x4_vert_left_pred(currMB, pl, ioff, joff));
+ break;
+ case HOR_UP_PRED:
+ return (intra4x4_hor_up_pred(currMB, pl, ioff, joff));
+ break;
+ case HOR_DOWN_PRED:
+ return (intra4x4_hor_down_pred(currMB, pl, ioff, joff));
+ default:
+ printf("Error: illegal intra_4x4 prediction mode: %d\n", (int) predmode);
+ return SEARCH_SYNC;
+ break;
+ }
+}
diff --git a/Src/h264dec/ldecod/src/intra8x8_pred.c b/Src/h264dec/ldecod/src/intra8x8_pred.c
new file mode 100644
index 00000000..217e0ea8
--- /dev/null
+++ b/Src/h264dec/ldecod/src/intra8x8_pred.c
@@ -0,0 +1,1928 @@
+/*!
+ *************************************************************************************
+ * \file intra8x8_pred.c
+ *
+ * \brief
+ * Functions for intra 8x8 prediction
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright,
+ * address and affiliation details)
+ * - Yuri Vatis
+ * - Jan Muenster
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *
+ *************************************************************************************
+ */
+#include "global.h"
+#include "intra8x8_pred.h"
+#include "mb_access.h"
+#include "image.h"
+
+// Notation for comments regarding prediction and predictors.
+// The pels of the 8x8 block are labeled a..p. The predictor pels above
+// are labeled A..H, from the left I..P, and from above left X, as follows:
+//
+// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+// 17 a1 b1 c1 d1 e1 f1 g1 h1
+// 18 a2 b2 c2 d2 e2 f2 g2 h2
+// 19 a3 b3 c3 d3 e3 f3 g3 h3
+// 20 a4 b4 c4 d4 e4 f4 g4 h4
+// 21 a5 b5 c5 d5 e5 f5 g5 h5
+// 22 a6 b6 c6 d6 e6 f6 g6 h6
+// 23 a7 b7 c7 d7 e7 f7 g7 h7
+// 24 a8 b8 c8 d8 e8 f8 g8 h8
+
+
+static void memset_8x8(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, int pred)
+{
+#ifdef _M_IX86
+ // benski> can't believe the shitty code that the compiler generated... this code is better
+ __m64 mmx_pred = _mm_set1_pi8(pred);
+ mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x];
+ *(__m64 *)mb_pred[0] = mmx_pred;
+ *(__m64 *)mb_pred[1] = mmx_pred;
+ *(__m64 *)mb_pred[2] = mmx_pred;
+ *(__m64 *)mb_pred[3] = mmx_pred;
+ *(__m64 *)mb_pred[4] = mmx_pred;
+ *(__m64 *)mb_pred[5] = mmx_pred;
+ *(__m64 *)mb_pred[6] = mmx_pred;
+ *(__m64 *)mb_pred[7] = mmx_pred;
+#else
+ int ii, jj;
+ for (jj = 0; jj < BLOCK_SIZE_8x8; jj++)
+ {
+ for (ii = 0; ii < BLOCK_SIZE_8x8; ii++)
+ {
+ mb_pred[jj][offset_x+ii]=(imgpel) pred;
+ }
+ }
+#endif
+}
+
+static void memset_8x8_row(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, const imgpel row[8])
+{
+#ifdef _M_IX86
+ // benski> can't believe the shitty code that the compiler generated... this code is better
+ __m64 mmx_pred = *(__m64 *)row;
+ mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x];
+ *(__m64 *)mb_pred[0] = mmx_pred;
+ *(__m64 *)mb_pred[1] = mmx_pred;
+ *(__m64 *)mb_pred[2] = mmx_pred;
+ *(__m64 *)mb_pred[3] = mmx_pred;
+ *(__m64 *)mb_pred[4] = mmx_pred;
+ *(__m64 *)mb_pred[5] = mmx_pred;
+ *(__m64 *)mb_pred[6] = mmx_pred;
+ *(__m64 *)mb_pred[7] = mmx_pred;
+#else
+ int jj;
+ for (jj = 0; jj < BLOCK_SIZE_8x8; jj++)
+ {
+ memcpy(&mb_pred[jj][offset_x], row, 8);
+ }
+#endif
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Prefiltering for Intra8x8 prediction
+ *************************************************************************************
+ */
+static __forceinline void LowPassForIntra8x8Pred(imgpel *PredPel, int block_up_left, int block_up, int block_left)
+{
+ imgpel LoopArray[25];
+
+ memcpy(&LoopArray[0], &PredPel[0], 25 * sizeof(imgpel));
+
+ if(block_up_left)
+ {
+ if(block_up && block_left)
+ {
+ PredPel[0] = (imgpel) ((LoopArray[17] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2);
+ }
+ else
+ {
+ if(block_up)
+ PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2);
+ else if (block_left)
+ PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[17] + 2)>>2);
+ }
+ }
+
+ if(block_up)
+ {
+ if(block_up_left)
+ {
+ PredPel[1] = (imgpel) ((LoopArray[0] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2);
+ }
+ else
+ PredPel[1] = (imgpel) ((LoopArray[1] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2);
+
+
+ PredPel[2] = (imgpel) ((LoopArray[2-1] + (LoopArray[2]<<1) + LoopArray[2+1] + 2)>>2);
+ PredPel[3] = (imgpel) ((LoopArray[3-1] + (LoopArray[3]<<1) + LoopArray[3+1] + 2)>>2);
+ PredPel[4] = (imgpel) ((LoopArray[4-1] + (LoopArray[4]<<1) + LoopArray[4+1] + 2)>>2);
+ PredPel[5] = (imgpel) ((LoopArray[5-1] + (LoopArray[5]<<1) + LoopArray[5+1] + 2)>>2);
+ PredPel[6] = (imgpel) ((LoopArray[6-1] + (LoopArray[6]<<1) + LoopArray[6+1] + 2)>>2);
+ PredPel[7] = (imgpel) ((LoopArray[7-1] + (LoopArray[7]<<1) + LoopArray[7+1] + 2)>>2);
+ PredPel[8] = (imgpel) ((LoopArray[8-1] + (LoopArray[8]<<1) + LoopArray[8+1] + 2)>>2);
+ PredPel[9] = (imgpel) ((LoopArray[9-1] + (LoopArray[9]<<1) + LoopArray[9+1] + 2)>>2);
+ PredPel[10] = (imgpel) ((LoopArray[10-1] + (LoopArray[10]<<1) + LoopArray[10+1] + 2)>>2);
+ PredPel[11] = (imgpel) ((LoopArray[11-1] + (LoopArray[11]<<1) + LoopArray[11+1] + 2)>>2);
+ PredPel[12] = (imgpel) ((LoopArray[12-1] + (LoopArray[12]<<1) + LoopArray[12+1] + 2)>>2);
+ PredPel[13] = (imgpel) ((LoopArray[13-1] + (LoopArray[13]<<1) + LoopArray[13+1] + 2)>>2);
+ PredPel[14] = (imgpel) ((LoopArray[14-1] + (LoopArray[14]<<1) + LoopArray[14+1] + 2)>>2);
+ PredPel[15] = (imgpel) ((LoopArray[15-1] + (LoopArray[15]<<1) + LoopArray[15+1] + 2)>>2);
+
+ PredPel[16] = (imgpel) ((LoopArray[16] + (LoopArray[16]<<1) + LoopArray[15] + 2)>>2);
+ }
+
+ if(block_left)
+ {
+ if(block_up_left)
+ PredPel[17] = (imgpel) ((LoopArray[0] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2);
+ else
+ PredPel[17] = (imgpel) ((LoopArray[17] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2);
+
+ PredPel[18] = (imgpel) ((LoopArray[18-1] + (LoopArray[18]<<1) + LoopArray[18+1] + 2)>>2);
+ PredPel[19] = (imgpel) ((LoopArray[19-1] + (LoopArray[19]<<1) + LoopArray[19+1] + 2)>>2);
+ PredPel[20] = (imgpel) ((LoopArray[20-1] + (LoopArray[20]<<1) + LoopArray[20+1] + 2)>>2);
+ PredPel[21] = (imgpel) ((LoopArray[21-1] + (LoopArray[21]<<1) + LoopArray[21+1] + 2)>>2);
+ PredPel[22] = (imgpel) ((LoopArray[22-1] + (LoopArray[22]<<1) + LoopArray[22+1] + 2)>>2);
+ PredPel[23] = (imgpel) ((LoopArray[23-1] + (LoopArray[23]<<1) + LoopArray[23+1] + 2)>>2);
+
+ PredPel[24] = (imgpel) ((LoopArray[23] + (LoopArray[24]<<1) + LoopArray[24] + 2) >> 2);
+ }
+
+ //memcpy(&PredPel[0], &LoopArray[0], 25 * sizeof(imgpel));
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Prefiltering for Intra8x8 prediction (Horizontal)
+ *************************************************************************************
+ */
+static __forceinline void LowPassForIntra8x8PredHor(imgpel *PredPel, int block_up_left, int block_up, int block_left)
+{
+ imgpel LoopArray[16];
+
+ memcpy(&LoopArray[0], &PredPel[0], 16 * sizeof(imgpel));
+
+ if(block_up_left)
+ {
+ if(block_up && block_left)
+ {
+ PredPel[0] = (imgpel) ((PredPel[17] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2);
+ }
+ else
+ {
+ if(block_up)
+ PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2);
+ else if (block_left)
+ PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + PredPel[17] + 2)>>2);
+ }
+ }
+
+ if(block_up)
+ {
+ if(block_up_left)
+ {
+ PredPel[1] = (imgpel) ((LoopArray[0] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2);
+ }
+ else
+ PredPel[1] = (imgpel) ((LoopArray[1] + (LoopArray[1]<<1) + LoopArray[2] + 2)>>2);
+
+
+ PredPel[2] = (imgpel) ((LoopArray[2-1] + (LoopArray[2]<<1) + LoopArray[2+1] + 2)>>2);
+ PredPel[3] = (imgpel) ((LoopArray[3-1] + (LoopArray[3]<<1) + LoopArray[3+1] + 2)>>2);
+ PredPel[4] = (imgpel) ((LoopArray[4-1] + (LoopArray[4]<<1) + LoopArray[4+1] + 2)>>2);
+ PredPel[5] = (imgpel) ((LoopArray[5-1] + (LoopArray[5]<<1) + LoopArray[5+1] + 2)>>2);
+ PredPel[6] = (imgpel) ((LoopArray[6-1] + (LoopArray[6]<<1) + LoopArray[6+1] + 2)>>2);
+ PredPel[7] = (imgpel) ((LoopArray[7-1] + (LoopArray[7]<<1) + LoopArray[7+1] + 2)>>2);
+ PredPel[8] = (imgpel) ((LoopArray[8-1] + (LoopArray[8]<<1) + LoopArray[8+1] + 2)>>2);
+ PredPel[9] = (imgpel) ((LoopArray[9-1] + (LoopArray[9]<<1) + LoopArray[9+1] + 2)>>2);
+ PredPel[10] = (imgpel) ((LoopArray[10-1] + (LoopArray[10]<<1) + LoopArray[10+1] + 2)>>2);
+ PredPel[11] = (imgpel) ((LoopArray[11-1] + (LoopArray[11]<<1) + LoopArray[11+1] + 2)>>2);
+ PredPel[12] = (imgpel) ((LoopArray[12-1] + (LoopArray[12]<<1) + LoopArray[12+1] + 2)>>2);
+ PredPel[13] = (imgpel) ((LoopArray[13-1] + (LoopArray[13]<<1) + LoopArray[13+1] + 2)>>2);
+ PredPel[14] = (imgpel) ((LoopArray[14-1] + (LoopArray[14]<<1) + LoopArray[14+1] + 2)>>2);
+ PredPel[15] = (imgpel) ((LoopArray[15-1] + (LoopArray[15]<<1) + PredPel[15+1] + 2)>>2);
+ PredPel[16] = (imgpel) ((PredPel[16] + (PredPel[16]<<1) + LoopArray[15] + 2)>>2);
+ }
+
+
+ //memcpy(&PredPel[0], &LoopArray[0], 17 * sizeof(imgpel));
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Prefiltering for Intra8x8 prediction (Vertical)
+ *************************************************************************************
+ */
+static __forceinline void LowPassForIntra8x8PredVer(imgpel *PredPel, int block_up_left, int block_up, int block_left)
+{
+ // These functions need some cleanup and can be further optimized.
+ // For convenience, let us copy all data for now. It is obvious that the filtering makes things a bit more "complex"
+ int i;
+ imgpel LoopArray[25];
+
+ //memcpy(&LoopArray[0], &PredPel[0], 25 * sizeof(imgpel));
+ LoopArray[0] = PredPel[0];
+ LoopArray[1] = PredPel[1];
+ LoopArray[17] = PredPel[17];
+ LoopArray[18] = PredPel[18];
+ LoopArray[19] = PredPel[19];
+ LoopArray[20] = PredPel[20];
+ LoopArray[21] = PredPel[21];
+ LoopArray[22] = PredPel[22];
+ LoopArray[23] = PredPel[23];
+ LoopArray[24] = PredPel[24];
+
+ if(block_up_left)
+ {
+ if(block_up && block_left)
+ {
+ PredPel[0] = (imgpel) ((LoopArray[17] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2);
+ }
+ else
+ {
+ if(block_up)
+ PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[1] + 2)>>2);
+ else if (block_left)
+ PredPel[0] = (imgpel) ((LoopArray[0] + (LoopArray[0]<<1) + LoopArray[17] + 2)>>2);
+ }
+ }
+
+ if(block_left)
+ {
+ if(block_up_left)
+ PredPel[17] = (imgpel) ((LoopArray[0] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2);
+ else
+ PredPel[17] = (imgpel) ((LoopArray[17] + (LoopArray[17]<<1) + LoopArray[18] + 2)>>2);
+
+ for(i = 18; i <24; i++)
+ {
+ PredPel[i] = (imgpel) ((LoopArray[i-1] + (LoopArray[i]<<1) + LoopArray[i+1] + 2)>>2);
+ }
+ PredPel[24] = (imgpel) ((LoopArray[23] + (LoopArray[24]<<1) + LoopArray[24] + 2) >> 2);
+ }
+
+ //memcpy(&PredPel[0], &LoopArray[0], 25 * sizeof(imgpel));
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 DC prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_dc_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ int s0 = 0;
+ imgpel PredPel[25]; // array of predictor pels
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ imgpel **imgY = (pl) ? dec_picture->imgUV[pl - 1]->img : dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+ imgpel *pred_pels;
+
+ if (ioff == 0)
+ {
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourNPLumaNB(currMB, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbour0XLuma(currMB, joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLumaNB(currMB, 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourNXLuma(currMB, joff - 1, &pix_d);
+ }
+ else
+ { // ioff == 8
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourPPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff - 1, joff - 1, &pix_d);
+ }
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ int i;
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[0].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[0].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[0].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[0].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[0].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[0].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[0].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+
+ if (block_available_up && block_available_left)
+ {
+ // no edge
+ s0 = (PredPel[1] + PredPel[2] + PredPel[3] + PredPel[4] + PredPel[5] + PredPel[6] + PredPel[7] + PredPel[8] + PredPel[17] + PredPel[18] + PredPel[19] + PredPel[20] + PredPel[21] + PredPel[22] + PredPel[23] + PredPel[24] + 8) >> 4;
+ }
+ else if (!block_available_up && block_available_left)
+ {
+ // upper edge
+ s0 = (PredPel[17] + PredPel[18] + PredPel[19] + PredPel[20] + PredPel[21] + PredPel[22] + PredPel[23] + PredPel[24] + 4) >> 3;
+ }
+ else if (block_available_up && !block_available_left)
+ {
+ // left edge
+ s0 = (PredPel[1] + PredPel[2] + PredPel[3] + PredPel[4] + PredPel[5] + PredPel[6] + PredPel[7] + PredPel[8] + 4) >> 3;
+ }
+ else //if (!block_available_up && !block_available_left)
+ {
+ // top left corner, nothing to predict from
+ s0 = p_Vid->dc_pred_value_comp[pl];
+ }
+
+ memset_8x8(&currSlice->mb_pred[pl][joff], ioff, s0);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 vertical prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_vert_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+
+ imgpel *pred_pels;
+
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB_NoPos(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if (!block_available_up)
+ printf ("warning: Intra_8x8_Vertical prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = *(pred_pels ++);
+ PredPel[2] = *(pred_pels ++);
+ PredPel[3] = *(pred_pels ++);
+ PredPel[4] = *(pred_pels ++);
+ PredPel[5] = *(pred_pels ++);
+ PredPel[6] = *(pred_pels ++);
+ PredPel[7] = *(pred_pels ++);
+ PredPel[8] = *pred_pels;
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = *(pred_pels ++);
+ PredPel[10] = *(pred_pels ++);
+ PredPel[11] = *(pred_pels ++);
+ PredPel[12] = *(pred_pels ++);
+ PredPel[13] = *(pred_pels ++);
+ PredPel[14] = *(pred_pels ++);
+ PredPel[15] = *(pred_pels ++);
+ PredPel[16] = *pred_pels;
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8PredHor(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+
+ memset_8x8_row(&currSlice->mb_pred[pl][joff], ioff, &PredPel[1]);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 horizontal prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_hor_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+
+ int i,j;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+ int jpos;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if (!block_available_left)
+ printf ("warning: Intra_8x8_Horizontal prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8PredVer(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+
+ for (j=0; j < BLOCK_SIZE_8x8; j++)
+ {
+ jpos = j + joff;
+ currSlice->mb_pred[pl][jpos][ioff] =
+ currSlice->mb_pred[pl][jpos][ioff+1] =
+ currSlice->mb_pred[pl][jpos][ioff+2] =
+ currSlice->mb_pred[pl][jpos][ioff+3] =
+ currSlice->mb_pred[pl][jpos][ioff+4] =
+ currSlice->mb_pred[pl][jpos][ioff+5] =
+ currSlice->mb_pred[pl][jpos][ioff+6] =
+ currSlice->mb_pred[pl][jpos][ioff+7] = (imgpel) (&PredPel[17])[j];
+ }
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 diagonal down right prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_diag_down_right_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+h264_imgpel_macroblock_row_t *pred;
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+ imgpel *pred_pels;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if ((!block_available_up)||(!block_available_left)||(!block_available_up_left))
+ printf ("warning: Intra_8x8_Diagonal_Down_Right prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+
+ // Mode DIAG_DOWN_RIGHT_PRED
+ pred = &currSlice->mb_pred[pl][joff];
+ pred[0+7][ioff+0] = (imgpel) ((PredPel[24] + PredPel[22] + 2*(PredPel[23]) + 2) >> 2);
+ pred[0+6][ioff+0] =
+ pred[0+7][ioff+1] = (imgpel) ((PredPel[23] + PredPel[21] + 2*(PredPel[22]) + 2) >> 2);
+ pred[0+5][ioff+0] =
+ pred[0+6][ioff+1] =
+ pred[0+7][ioff+2] = (imgpel) ((PredPel[22] + PredPel[20] + 2*(PredPel[21]) + 2) >> 2);
+ pred[0+4][ioff+0] =
+ pred[0+5][ioff+1] =
+ pred[0+6][ioff+2] =
+ pred[0+7][ioff+3] = (imgpel) ((PredPel[21] + PredPel[19] + 2*(PredPel[20]) + 2) >> 2);
+ pred[0+3][ioff+0] =
+ pred[0+4][ioff+1] =
+ pred[0+5][ioff+2] =
+ pred[0+6][ioff+3] =
+ pred[0+7][ioff+4] = (imgpel) ((PredPel[20] + PredPel[18] + 2*(PredPel[19]) + 2) >> 2);
+ pred[0+2][ioff+0] =
+ pred[0+3][ioff+1] =
+ pred[0+4][ioff+2] =
+ pred[0+5][ioff+3] =
+ pred[0+6][ioff+4] =
+ pred[0+7][ioff+5] = (imgpel) ((PredPel[19] + PredPel[17] + 2*(PredPel[18]) + 2) >> 2);
+ pred[0+1][ioff+0] =
+ pred[0+2][ioff+1] =
+ pred[0+3][ioff+2] =
+ pred[0+4][ioff+3] =
+ pred[0+5][ioff+4] =
+ pred[0+6][ioff+5] =
+ pred[0+7][ioff+6] = (imgpel) ((PredPel[18] + PredPel[0] + 2*(PredPel[17]) + 2) >> 2);
+ pred[0+0][ioff+0] =
+ pred[0+1][ioff+1] =
+ pred[0+2][ioff+2] =
+ pred[0+3][ioff+3] =
+ pred[0+4][ioff+4] =
+ pred[0+5][ioff+5] =
+ pred[0+6][ioff+6] =
+ pred[0+7][ioff+7] = (imgpel) ((PredPel[17] + PredPel[1] + 2*(PredPel[0]) + 2) >> 2);
+ pred[0+0][ioff+1] =
+ pred[0+1][ioff+2] =
+ pred[0+2][ioff+3] =
+ pred[0+3][ioff+4] =
+ pred[0+4][ioff+5] =
+ pred[0+5][ioff+6] =
+ pred[0+6][ioff+7] = (imgpel) ((PredPel[0] + PredPel[2] + 2*(PredPel[1]) + 2) >> 2);
+ pred[0+0][ioff+2] =
+ pred[0+1][ioff+3] =
+ pred[0+2][ioff+4] =
+ pred[0+3][ioff+5] =
+ pred[0+4][ioff+6] =
+ pred[0+5][ioff+7] = (imgpel) ((PredPel[1] + PredPel[3] + 2*(PredPel[2]) + 2) >> 2);
+ pred[0+0][ioff+3] =
+ pred[0+1][ioff+4] =
+ pred[0+2][ioff+5] =
+ pred[0+3][ioff+6] =
+ pred[0+4][ioff+7] = (imgpel) ((PredPel[2] + PredPel[4] + 2*(PredPel[3]) + 2) >> 2);
+ pred[0+0][ioff+4] =
+ pred[0+1][ioff+5] =
+ pred[0+2][ioff+6] =
+ pred[0+3][ioff+7] = (imgpel) ((PredPel[3] + PredPel[5] + 2*(PredPel[4]) + 2) >> 2);
+ pred[0+0][ioff+5] =
+ pred[0+1][ioff+6] =
+ pred[0+2][ioff+7] = (imgpel) ((PredPel[4] + PredPel[6] + 2*(PredPel[5]) + 2) >> 2);
+ pred[0+0][ioff+6] =
+ pred[0+1][ioff+7] = (imgpel) ((PredPel[5] + PredPel[7] + 2*(PredPel[6]) + 2) >> 2);
+ pred[0+0][ioff+7] = (imgpel) ((PredPel[6] + PredPel[8] + 2*(PredPel[7]) + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 diagonal down left prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_diag_down_left_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ h264_imgpel_macroblock_row_t *pred;
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+
+ imgpel *pred_pels;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if (!block_available_up)
+ printf ("warning: Intra_8x8_Diagonal_Down_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+
+ // Mode DIAG_DOWN_LEFT_PRED
+ pred = &currSlice->mb_pred[pl][joff];
+ pred[0+0][ioff+0] = (imgpel) ((PredPel[1] + PredPel[3] + 2*(PredPel[2]) + 2) >> 2);
+ pred[0+1][ioff+0] =
+ pred[0+0][ioff+1] = (imgpel) ((PredPel[2] + PredPel[4] + 2*(PredPel[3]) + 2) >> 2);
+ pred[0+2][ioff+0] =
+ pred[0+1][ioff+1] =
+ pred[0+0][ioff+2] = (imgpel) ((PredPel[3] + PredPel[5] + 2*(PredPel[4]) + 2) >> 2);
+ pred[0+3][ioff+0] =
+ pred[0+2][ioff+1] =
+ pred[0+1][ioff+2] =
+ pred[0+0][ioff+3] = (imgpel) ((PredPel[4] + PredPel[6] + 2*(PredPel[5]) + 2) >> 2);
+ pred[0+4][ioff+0] =
+ pred[0+3][ioff+1] =
+ pred[0+2][ioff+2] =
+ pred[0+1][ioff+3] =
+ pred[0+0][ioff+4] = (imgpel) ((PredPel[5] + PredPel[7] + 2*(PredPel[6]) + 2) >> 2);
+ pred[0+5][ioff+0] =
+ pred[0+4][ioff+1] =
+ pred[0+3][ioff+2] =
+ pred[0+2][ioff+3] =
+ pred[0+1][ioff+4] =
+ pred[0+0][ioff+5] = (imgpel) ((PredPel[6] + PredPel[8] + 2*(PredPel[7]) + 2) >> 2);
+ pred[0+6][ioff+0] =
+ pred[0+5][ioff+1] =
+ pred[0+4][ioff+2] =
+ pred[0+3][ioff+3] =
+ pred[0+2][ioff+4] =
+ pred[0+1][ioff+5] =
+ pred[0+0][ioff+6] = (imgpel) ((PredPel[7] + PredPel[9] + 2*(PredPel[8]) + 2) >> 2);
+ pred[0+7][ioff+0] =
+ pred[0+6][ioff+1] =
+ pred[0+5][ioff+2] =
+ pred[0+4][ioff+3] =
+ pred[0+3][ioff+4] =
+ pred[0+2][ioff+5] =
+ pred[0+1][ioff+6] =
+ pred[0+0][ioff+7] = (imgpel) ((PredPel[8] + PredPel[10] + 2*(PredPel[9]) + 2) >> 2);
+ pred[0+7][ioff+1] =
+ pred[0+6][ioff+2] =
+ pred[0+5][ioff+3] =
+ pred[0+4][ioff+4] =
+ pred[0+3][ioff+5] =
+ pred[0+2][ioff+6] =
+ pred[0+1][ioff+7] = (imgpel) ((PredPel[9] + PredPel[11] + 2*(PredPel[10]) + 2) >> 2);
+ pred[0+7][ioff+2] =
+ pred[0+6][ioff+3] =
+ pred[0+5][ioff+4] =
+ pred[0+4][ioff+5] =
+ pred[0+3][ioff+6] =
+ pred[0+2][ioff+7] = (imgpel) ((PredPel[10] + PredPel[12] + 2*(PredPel[11]) + 2) >> 2);
+ pred[0+7][ioff+3] =
+ pred[0+6][ioff+4] =
+ pred[0+5][ioff+5] =
+ pred[0+4][ioff+6] =
+ pred[0+3][ioff+7] = (imgpel) ((PredPel[11] + PredPel[13] + 2*(PredPel[12]) + 2) >> 2);
+ pred[0+7][ioff+4] =
+ pred[0+6][ioff+5] =
+ pred[0+5][ioff+6] =
+ pred[0+4][ioff+7] = (imgpel) ((PredPel[12] + PredPel[14] + 2*(PredPel[13]) + 2) >> 2);
+ pred[0+7][ioff+5] =
+ pred[0+6][ioff+6] =
+ pred[0+5][ioff+7] = (imgpel) ((PredPel[13] + PredPel[15] + 2*(PredPel[14]) + 2) >> 2);
+ pred[0+7][ioff+6] =
+ pred[0+6][ioff+7] = (imgpel) ((PredPel[14] + PredPel[16] + 2*(PredPel[15]) + 2) >> 2);
+ pred[0+7][ioff+7] = (imgpel) ((PredPel[15] + 3*(PredPel[16]) + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 vertical right prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_vert_right_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ h264_imgpel_macroblock_row_t *pred;
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+ imgpel *pred_pels;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if ((!block_available_up)||(!block_available_left)||(!block_available_up_left))
+ printf ("warning: Intra_8x8_Vertical_Right prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+pred = &currSlice->mb_pred[pl][joff];
+ pred[0+0][ioff+0] =
+ pred[0+2][ioff+1] =
+ pred[0+4][ioff+2] =
+ pred[0+6][ioff+3] = (imgpel) ((PredPel[0] + PredPel[1] + 1) >> 1);
+ pred[0+0][ioff+1] =
+ pred[0+2][ioff+2] =
+ pred[0+4][ioff+3] =
+ pred[0+6][ioff+4] = (imgpel) ((PredPel[1] + PredPel[2] + 1) >> 1);
+ pred[0+0][ioff+2] =
+ pred[0+2][ioff+3] =
+ pred[0+4][ioff+4] =
+ pred[0+6][ioff+5] = (imgpel) ((PredPel[2] + PredPel[3] + 1) >> 1);
+ pred[0+0][ioff+3] =
+ pred[0+2][ioff+4] =
+ pred[0+4][ioff+5] =
+ pred[0+6][ioff+6] = (imgpel) ((PredPel[3] + PredPel[4] + 1) >> 1);
+ pred[0+0][ioff+4] =
+ pred[0+2][ioff+5] =
+ pred[0+4][ioff+6] =
+ pred[0+6][ioff+7] = (imgpel) ((PredPel[4] + PredPel[5] + 1) >> 1);
+ pred[0+0][ioff+5] =
+ pred[0+2][ioff+6] =
+ pred[0+4][ioff+7] = (imgpel) ((PredPel[5] + PredPel[6] + 1) >> 1);
+ pred[0+0][ioff+6] =
+ pred[0+2][ioff+7] = (imgpel) ((PredPel[6] + PredPel[7] + 1) >> 1);
+ pred[0+0][ioff+7] = (imgpel) ((PredPel[7] + PredPel[8] + 1) >> 1);
+ pred[0+1][ioff+0] =
+ pred[0+3][ioff+1] =
+ pred[0+5][ioff+2] =
+ pred[0+7][ioff+3] = (imgpel) ((PredPel[17] + PredPel[1] + 2*PredPel[0] + 2) >> 2);
+ pred[0+1][ioff+1] =
+ pred[0+3][ioff+2] =
+ pred[0+5][ioff+3] =
+ pred[0+7][ioff+4] = (imgpel) ((PredPel[0] + PredPel[2] + 2*PredPel[1] + 2) >> 2);
+ pred[0+1][ioff+2] =
+ pred[0+3][ioff+3] =
+ pred[0+5][ioff+4] =
+ pred[0+7][ioff+5] = (imgpel) ((PredPel[1] + PredPel[3] + 2*PredPel[2] + 2) >> 2);
+ pred[0+1][ioff+3] =
+ pred[0+3][ioff+4] =
+ pred[0+5][ioff+5] =
+ pred[0+7][ioff+6] = (imgpel) ((PredPel[2] + PredPel[4] + 2*PredPel[3] + 2) >> 2);
+ pred[0+1][ioff+4] =
+ pred[0+3][ioff+5] =
+ pred[0+5][ioff+6] =
+ pred[0+7][ioff+7] = (imgpel) ((PredPel[3] + PredPel[5] + 2*PredPel[4] + 2) >> 2);
+ pred[0+1][ioff+5] =
+ pred[0+3][ioff+6] =
+ pred[0+5][ioff+7] = (imgpel) ((PredPel[4] + PredPel[6] + 2*PredPel[5] + 2) >> 2);
+ pred[0+1][ioff+6] =
+ pred[0+3][ioff+7] = (imgpel) ((PredPel[5] + PredPel[7] + 2*PredPel[6] + 2) >> 2);
+ pred[0+1][ioff+7] = (imgpel) ((PredPel[6] + PredPel[8] + 2*PredPel[7] + 2) >> 2);
+ pred[0+2][ioff+0] =
+ pred[0+4][ioff+1] =
+ pred[0+6][ioff+2] = (imgpel) ((PredPel[18] + PredPel[0] + 2*PredPel[17] + 2) >> 2);
+ pred[0+3][ioff+0] =
+ pred[0+5][ioff+1] =
+ pred[0+7][ioff+2] = (imgpel) ((PredPel[19] + PredPel[17] + 2*PredPel[18] + 2) >> 2);
+ pred[0+4][ioff+0] =
+ pred[0+6][ioff+1] = (imgpel) ((PredPel[20] + PredPel[18] + 2*PredPel[19] + 2) >> 2);
+ pred[0+5][ioff+0] =
+ pred[0+7][ioff+1] = (imgpel) ((PredPel[21] + PredPel[19] + 2*PredPel[20] + 2) >> 2);
+ pred[0+6][ioff+0] = (imgpel) ((PredPel[22] + PredPel[20] + 2*PredPel[21] + 2) >> 2);
+ pred[0+7][ioff+0] = (imgpel) ((PredPel[23] + PredPel[21] + 2*PredPel[22] + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 vertical left prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_vert_left_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+h264_imgpel_macroblock_row_t *pred;
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+ imgpel *pred_pels;
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if (!block_available_up)
+ printf ("warning: Intra_4x4_Vertical_Left prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+pred = &currSlice->mb_pred[pl][joff];
+ pred[0+0][ioff+0] = (imgpel) ((PredPel[1] + PredPel[2] + 1) >> 1);
+ pred[0+0][ioff+1] =
+ pred[0+2][ioff+0] = (imgpel) ((PredPel[2] + PredPel[3] + 1) >> 1);
+ pred[0+0][ioff+2] =
+ pred[0+2][ioff+1] =
+ pred[0+4][ioff+0] = (imgpel) ((PredPel[3] + PredPel[4] + 1) >> 1);
+ pred[0+0][ioff+3] =
+ pred[0+2][ioff+2] =
+ pred[0+4][ioff+1] =
+ pred[0+6][ioff+0] = (imgpel) ((PredPel[4] + PredPel[5] + 1) >> 1);
+ pred[0+0][ioff+4] =
+ pred[0+2][ioff+3] =
+ pred[0+4][ioff+2] =
+ pred[0+6][ioff+1] = (imgpel) ((PredPel[5] + PredPel[6] + 1) >> 1);
+ pred[0+0][ioff+5] =
+ pred[0+2][ioff+4] =
+ pred[0+4][ioff+3] =
+ pred[0+6][ioff+2] = (imgpel) ((PredPel[6] + PredPel[7] + 1) >> 1);
+ pred[0+0][ioff+6] =
+ pred[0+2][ioff+5] =
+ pred[0+4][ioff+4] =
+ pred[0+6][ioff+3] = (imgpel) ((PredPel[7] + PredPel[8] + 1) >> 1);
+ pred[0+0][ioff+7] =
+ pred[0+2][ioff+6] =
+ pred[0+4][ioff+5] =
+ pred[0+6][ioff+4] = (imgpel) ((PredPel[8] + PredPel[9] + 1) >> 1);
+ pred[0+2][ioff+7] =
+ pred[0+4][ioff+6] =
+ pred[0+6][ioff+5] = (imgpel) ((PredPel[9] + PredPel[10] + 1) >> 1);
+ pred[0+4][ioff+7] =
+ pred[0+6][ioff+6] = (imgpel) ((PredPel[10] + PredPel[11] + 1) >> 1);
+ pred[0+6][ioff+7] = (imgpel) ((PredPel[11] + PredPel[12] + 1) >> 1);
+ pred[0+1][ioff+0] = (imgpel) ((PredPel[1] + PredPel[3] + 2*PredPel[2] + 2) >> 2);
+ pred[0+1][ioff+1] =
+ pred[0+3][ioff+0] = (imgpel) ((PredPel[2] + PredPel[4] + 2*PredPel[3] + 2) >> 2);
+ pred[0+1][ioff+2] =
+ pred[0+3][ioff+1] =
+ pred[0+5][ioff+0] = (imgpel) ((PredPel[3] + PredPel[5] + 2*PredPel[4] + 2) >> 2);
+ pred[0+1][ioff+3] =
+ pred[0+3][ioff+2] =
+ pred[0+5][ioff+1] =
+ pred[0+7][ioff+0] = (imgpel) ((PredPel[4] + PredPel[6] + 2*PredPel[5] + 2) >> 2);
+ pred[0+1][ioff+4] =
+ pred[0+3][ioff+3] =
+ pred[0+5][ioff+2] =
+ pred[0+7][ioff+1] = (imgpel) ((PredPel[5] + PredPel[7] + 2*PredPel[6] + 2) >> 2);
+ pred[0+1][ioff+5] =
+ pred[0+3][ioff+4] =
+ pred[0+5][ioff+3] =
+ pred[0+7][ioff+2] = (imgpel) ((PredPel[6] + PredPel[8] + 2*PredPel[7] + 2) >> 2);
+ pred[0+1][ioff+6] =
+ pred[0+3][ioff+5] =
+ pred[0+5][ioff+4] =
+ pred[0+7][ioff+3] = (imgpel) ((PredPel[7] + PredPel[9] + 2*PredPel[8] + 2) >> 2);
+ pred[0+1][ioff+7] =
+ pred[0+3][ioff+6] =
+ pred[0+5][ioff+5] =
+ pred[0+7][ioff+4] = (imgpel) ((PredPel[8] + PredPel[10] + 2*PredPel[9] + 2) >> 2);
+ pred[0+3][ioff+7] =
+ pred[0+5][ioff+6] =
+ pred[0+7][ioff+5] = (imgpel) ((PredPel[9] + PredPel[11] + 2*PredPel[10] + 2) >> 2);
+ pred[0+5][ioff+7] =
+ pred[0+7][ioff+6] = (imgpel) ((PredPel[10] + PredPel[12] + 2*PredPel[11] + 2) >> 2);
+ pred[0+7][ioff+7] = (imgpel) ((PredPel[11] + PredPel[13] + 2*PredPel[12] + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 horizontal up prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_hor_up_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+h264_imgpel_macroblock_row_t *pred;
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+ imgpel *pred_pels;
+
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if (!block_available_left)
+ printf ("warning: Intra_8x8_Horizontal_Up prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+pred = &currSlice->mb_pred[pl][joff];
+ pred[0+0][ioff+0] = (imgpel) ((PredPel[17] + PredPel[18] + 1) >> 1);
+ pred[0+1][ioff+0] =
+ pred[0+0][ioff+2] = (imgpel) ((PredPel[18] + PredPel[19] + 1) >> 1);
+ pred[0+2][ioff+0] =
+ pred[0+1][ioff+2] =
+ pred[0+0][ioff+4] = (imgpel) ((PredPel[19] + PredPel[20] + 1) >> 1);
+ pred[0+3][ioff+0] =
+ pred[0+2][ioff+2] =
+ pred[0+1][ioff+4] =
+ pred[0+0][ioff+6] = (imgpel) ((PredPel[20] + PredPel[21] + 1) >> 1);
+ pred[0+4][ioff+0] =
+ pred[0+3][ioff+2] =
+ pred[0+2][ioff+4] =
+ pred[0+1][ioff+6] = (imgpel) ((PredPel[21] + PredPel[22] + 1) >> 1);
+ pred[0+5][ioff+0] =
+ pred[0+4][ioff+2] =
+ pred[0+3][ioff+4] =
+ pred[0+2][ioff+6] = (imgpel) ((PredPel[22] + PredPel[23] + 1) >> 1);
+ pred[0+6][ioff+0] =
+ pred[0+5][ioff+2] =
+ pred[0+4][ioff+4] =
+ pred[0+3][ioff+6] = (imgpel) ((PredPel[23] + PredPel[24] + 1) >> 1);
+ pred[0+4][ioff+6] =
+ pred[0+4][ioff+7] =
+ pred[0+5][ioff+4] =
+ pred[0+5][ioff+5] =
+ pred[0+5][ioff+6] =
+ pred[0+5][ioff+7] =
+ pred[0+6][ioff+2] =
+ pred[0+6][ioff+3] =
+ pred[0+6][ioff+4] =
+ pred[0+6][ioff+5] =
+ pred[0+6][ioff+6] =
+ pred[0+6][ioff+7] =
+ pred[0+7][ioff+0] =
+ pred[0+7][ioff+1] =
+ pred[0+7][ioff+2] =
+ pred[0+7][ioff+3] =
+ pred[0+7][ioff+4] =
+ pred[0+7][ioff+5] =
+ pred[0+7][ioff+6] =
+ pred[0+7][ioff+7] = (imgpel) PredPel[24];
+ pred[0+6][ioff+1] =
+ pred[0+5][ioff+3] =
+ pred[0+4][ioff+5] =
+ pred[0+3][ioff+7] = (imgpel) ((PredPel[23] + 3*PredPel[24] + 2) >> 2);
+ pred[0+5][ioff+1] =
+ pred[0+4][ioff+3] =
+ pred[0+3][ioff+5] =
+ pred[0+2][ioff+7] = (imgpel) ((PredPel[24] + PredPel[22] + 2*PredPel[23] + 2) >> 2);
+ pred[0+4][ioff+1] =
+ pred[0+3][ioff+3] =
+ pred[0+2][ioff+5] =
+ pred[0+1][ioff+7] = (imgpel) ((PredPel[23] + PredPel[21] + 2*PredPel[22] + 2) >> 2);
+ pred[0+3][ioff+1] =
+ pred[0+2][ioff+3] =
+ pred[0+1][ioff+5] =
+ pred[0+0][ioff+7] = (imgpel) ((PredPel[22] + PredPel[20] + 2*PredPel[21] + 2) >> 2);
+ pred[0+2][ioff+1] =
+ pred[0+1][ioff+3] =
+ pred[0+0][ioff+5] = (imgpel) ((PredPel[21] + PredPel[19] + 2*PredPel[20] + 2) >> 2);
+ pred[0+1][ioff+1] =
+ pred[0+0][ioff+3] = (imgpel) ((PredPel[20] + PredPel[18] + 2*PredPel[19] + 2) >> 2);
+ pred[0+0][ioff+1] = (imgpel) ((PredPel[19] + PredPel[17] + 2*PredPel[18] + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * makes and returns 8x8 horizontal down prediction mode
+ *
+ * \return
+ * DECODING_OK decoding of intraprediction mode was sucessfull \n
+ *
+ ***********************************************************************
+ */
+static inline int intra8x8_hor_down_pred(Macroblock *currMB, //!< current macroblock
+ ColorPlane pl, //!< current image plane
+ int ioff, //!< pixel offset X within MB
+ int joff) //!< pixel offset Y within MB
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+ imgpel PredPel[25]; // array of predictor pels
+ imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; // For MB level frame/field coding tools -- set default to imgY
+
+ PixelPos pix_a[8];
+ PixelPos pix_b, pix_c, pix_d;
+ h264_imgpel_macroblock_row_t *pred;
+ int block_available_up;
+ int block_available_left;
+ int block_available_up_left;
+ int block_available_up_right;
+
+ imgpel *pred_pels;
+
+
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 0, &pix_a[0]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 1, &pix_a[1]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 2, &pix_a[2]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 3, &pix_a[3]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 4, &pix_a[4]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 5, &pix_a[5]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 6, &pix_a[6]);
+ p_Vid->getNeighbourXPLumaNB(currMB, ioff - 1, joff + 7, &pix_a[7]);
+
+ p_Vid->getNeighbourPXLumaNB(currMB, ioff , joff - 1, &pix_b);
+ p_Vid->getNeighbourPXLuma(currMB, ioff + 8, joff - 1, &pix_c);
+ p_Vid->getNeighbourLuma(currMB, ioff - 1, joff - 1, &pix_d);
+ pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ for (i=0, block_available_left=1; i<8;i++)
+ block_available_left &= pix_a[i].available ? p_Vid->intra_block[pix_a[i].mb_addr]: 0;
+ block_available_up = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
+ block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
+ block_available_up_left = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
+ }
+ else
+ {
+ block_available_left = pix_a[0].available;
+ block_available_up = pix_b.available;
+ block_available_up_right = pix_c.available;
+ block_available_up_left = pix_d.available;
+ }
+
+ if ((!block_available_up)||(!block_available_left)||(!block_available_up_left))
+ printf ("warning: Intra_8x8_Horizontal_Down prediction mode not allowed at mb %d\n", (int) p_Vid->current_mb_nr);
+
+ // form predictor pels
+ if (block_available_up)
+ {
+ pred_pels = &imgY[pix_b.pos_y][pix_b.pos_x];
+ PredPel[1] = pred_pels[0];
+ PredPel[2] = pred_pels[1];
+ PredPel[3] = pred_pels[2];
+ PredPel[4] = pred_pels[3];
+ PredPel[5] = pred_pels[4];
+ PredPel[6] = pred_pels[5];
+ PredPel[7] = pred_pels[6];
+ PredPel[8] = pred_pels[7];
+ }
+ else
+ {
+ PredPel[1] = PredPel[2] = PredPel[3] = PredPel[4] = PredPel[5] = PredPel[6] = PredPel[7] = PredPel[8] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_right)
+ {
+ pred_pels = &imgY[pix_c.pos_y][pix_c.pos_x];
+ PredPel[9] = pred_pels[0];
+ PredPel[10] = pred_pels[1];
+ PredPel[11] = pred_pels[2];
+ PredPel[12] = pred_pels[3];
+ PredPel[13] = pred_pels[4];
+ PredPel[14] = pred_pels[5];
+ PredPel[15] = pred_pels[6];
+ PredPel[16] = pred_pels[7];
+
+ }
+ else
+ {
+ PredPel[9] = PredPel[10] = PredPel[11] = PredPel[12] = PredPel[13] = PredPel[14] = PredPel[15] = PredPel[16] = PredPel[8];
+ }
+
+ if (block_available_left)
+ {
+ PredPel[17] = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+ PredPel[18] = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+ PredPel[19] = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+ PredPel[20] = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+ PredPel[21] = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+ PredPel[22] = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+ PredPel[23] = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+ PredPel[24] = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+ }
+ else
+ {
+ PredPel[17] = PredPel[18] = PredPel[19] = PredPel[20] = PredPel[21] = PredPel[22] = PredPel[23] = PredPel[24] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ if (block_available_up_left)
+ {
+ PredPel[0] = imgY[pix_d.pos_y][pix_d.pos_x];
+ }
+ else
+ {
+ PredPel[0] = (imgpel) p_Vid->dc_pred_value_comp[pl];
+ }
+
+ LowPassForIntra8x8Pred(&(PredPel[0]), block_available_up_left, block_available_up, block_available_left);
+pred = &currSlice->mb_pred[pl][joff];
+ pred[0][ioff] =
+ pred[0+1][ioff+2] =
+ pred[0+2][ioff+4] =
+ pred[0+3][ioff+6] = (imgpel) ((PredPel[17] + PredPel[0] + 1) >> 1);
+ pred[0+1][ioff] =
+ pred[0+2][ioff+2] =
+ pred[0+3][ioff+4] =
+ pred[0+4][ioff+6] = (imgpel) ((PredPel[18] + PredPel[17] + 1) >> 1);
+ pred[0+2][ioff] =
+ pred[0+3][ioff+2] =
+ pred[0+4][ioff+4] =
+ pred[0+5][ioff+6] = (imgpel) ((PredPel[19] + PredPel[18] + 1) >> 1);
+ pred[0+3][ioff] =
+ pred[0+4][ioff+2] =
+ pred[0+5][ioff+4] =
+ pred[0+6][ioff+6] = (imgpel) ((PredPel[20] + PredPel[19] + 1) >> 1);
+ pred[0+4][ioff] =
+ pred[0+5][ioff+2] =
+ pred[0+6][ioff+4] =
+ pred[0+7][ioff+6] = (imgpel) ((PredPel[21] + PredPel[20] + 1) >> 1);
+ pred[0+5][ioff] =
+ pred[0+6][ioff+2] =
+ pred[0+7][ioff+4] = (imgpel) ((PredPel[22] + PredPel[21] + 1) >> 1);
+ pred[0+6][ioff] =
+ pred[0+7][ioff+2] = (imgpel) ((PredPel[23] + PredPel[22] + 1) >> 1);
+ pred[0+7][ioff] = (imgpel) ((PredPel[24] + PredPel[23] + 1) >> 1);
+ pred[0][ioff+1] =
+ pred[0+1][ioff+3] =
+ pred[0+2][ioff+5] =
+ pred[0+3][ioff+7] = (imgpel) ((PredPel[17] + PredPel[1] + 2*PredPel[0] + 2) >> 2);
+ pred[0+1][ioff+1] =
+ pred[0+2][ioff+3] =
+ pred[0+3][ioff+5] =
+ pred[0+4][ioff+7] = (imgpel) ((PredPel[0] + PredPel[18] + 2*PredPel[17] + 2) >> 2);
+ pred[0+2][ioff+1] =
+ pred[0+3][ioff+3] =
+ pred[0+4][ioff+5] =
+ pred[0+5][ioff+7] = (imgpel) ((PredPel[17] + PredPel[19] + 2*PredPel[18] + 2) >> 2);
+ pred[0+3][ioff+1] =
+ pred[0+4][ioff+3] =
+ pred[0+5][ioff+5] =
+ pred[0+6][ioff+7] = (imgpel) ((PredPel[18] + PredPel[20] + 2*PredPel[19] + 2) >> 2);
+ pred[0+4][ioff+1] =
+ pred[0+5][ioff+3] =
+ pred[0+6][ioff+5] =
+ pred[0+7][ioff+7] = (imgpel) ((PredPel[19] + PredPel[21] + 2*PredPel[20] + 2) >> 2);
+ pred[0+5][ioff+1] =
+ pred[0+6][ioff+3] =
+ pred[0+7][ioff+5] = (imgpel) ((PredPel[20] + PredPel[22] + 2*PredPel[21] + 2) >> 2);
+ pred[0+6][ioff+1] =
+ pred[0+7][ioff+3] = (imgpel) ((PredPel[21] + PredPel[23] + 2*PredPel[22] + 2) >> 2);
+ pred[0+7][ioff+1] = (imgpel) ((PredPel[22] + PredPel[24] + 2*PredPel[23] + 2) >> 2);
+ pred[0][ioff+2] =
+ pred[0+1][ioff+4] =
+ pred[0+2][ioff+6] = (imgpel) ((PredPel[0] + PredPel[2] + 2*PredPel[1] + 2) >> 2);
+ pred[0][ioff+3] =
+ pred[0+1][ioff+5] =
+ pred[0+2][ioff+7] = (imgpel) ((PredPel[1] + PredPel[3] + 2*PredPel[2] + 2) >> 2);
+ pred[0][ioff+4] =
+ pred[0+1][ioff+6] = (imgpel) ((PredPel[2] + PredPel[4] + 2*PredPel[3] + 2) >> 2);
+ pred[0][ioff+5] =
+ pred[0+1][ioff+7] = (imgpel) ((PredPel[3] + PredPel[5] + 2*PredPel[4] + 2) >> 2);
+ pred[0][ioff+6] = (imgpel) ((PredPel[4] + PredPel[6] + 2*PredPel[5] + 2) >> 2);
+ pred[0][ioff+7] = (imgpel) ((PredPel[5] + PredPel[7] + 2*PredPel[6] + 2) >> 2);
+
+ return DECODING_OK;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Make intra 8x8 prediction according to all 9 prediction modes.
+ * The routine uses left and upper neighbouring points from
+ * previous coded blocks to do this (if available). Notice that
+ * inaccessible neighbouring points are signalled with a negative
+ * value in the predmode array .
+ *
+ * \par Input:
+ * Starting point of current 8x8 block image position
+ *
+ ************************************************************************
+ */
+int intrapred8x8(Macroblock *currMB, //!< Current Macroblock
+ ColorPlane pl, //!< Current color plane
+ int ioff, //!< ioff
+ int joff) //!< joff
+
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int block_x = (currMB->block_x) + (ioff >> 2);
+ int block_y = (currMB->block_y) + (joff >> 2);
+ byte predmode = p_Vid->ipredmode[block_y][block_x];
+
+ currMB->ipmode_DPCM = predmode; //For residual DPCM
+
+ switch (predmode)
+ {
+ case DC_PRED:
+ return (intra8x8_dc_pred(currMB, pl, ioff, joff));
+ break;
+ case VERT_PRED:
+ return (intra8x8_vert_pred(currMB, pl, ioff, joff));
+ break;
+ case HOR_PRED:
+ return (intra8x8_hor_pred(currMB, pl, ioff, joff));
+ break;
+ case DIAG_DOWN_RIGHT_PRED:
+ return (intra8x8_diag_down_right_pred(currMB, pl, ioff, joff));
+ break;
+ case DIAG_DOWN_LEFT_PRED:
+ return (intra8x8_diag_down_left_pred(currMB, pl, ioff, joff));
+ break;
+ case VERT_RIGHT_PRED:
+ return (intra8x8_vert_right_pred(currMB, pl, ioff, joff));
+ break;
+ case VERT_LEFT_PRED:
+ return (intra8x8_vert_left_pred(currMB, pl, ioff, joff));
+ break;
+ case HOR_UP_PRED:
+ return (intra8x8_hor_up_pred(currMB, pl, ioff, joff));
+ break;
+ case HOR_DOWN_PRED:
+ return (intra8x8_hor_down_pred(currMB, pl, ioff, joff));
+ default:
+ printf("Error: illegal intra_8x8 prediction mode: %d\n", (int) predmode);
+ return SEARCH_SYNC;
+ break;
+ }
+
+ return DECODING_OK;
+}
+
+
diff --git a/Src/h264dec/ldecod/src/intra_chroma_pred.c b/Src/h264dec/ldecod/src/intra_chroma_pred.c
new file mode 100644
index 00000000..5c45ec6b
--- /dev/null
+++ b/Src/h264dec/ldecod/src/intra_chroma_pred.c
@@ -0,0 +1,357 @@
+/*!
+*************************************************************************************
+* \file intra_chroma_pred.c
+*
+* \brief
+* Functions for intra chroma prediction
+*
+* \author
+* Main contributors (see contributors.h for copyright,
+* address and affiliation details)
+* - Alexis Michael Tourapis <alexismt@ieee.org>
+*
+*************************************************************************************
+*/
+#include "global.h"
+#include "block.h"
+#include "mb_access.h"
+#include "image.h"
+
+static void intra_chroma_DC_single(imgpel **curr_img, int up_avail, int left_avail, PixelPos up, PixelPos left[17], int blk_x, int blk_y, int *pred, int direction )
+{
+ int s0;
+
+ if ((direction && up_avail) || (!left_avail && up_avail))
+ {
+ imgpel *img = &curr_img[up.pos_y][up.pos_x + blk_x];
+ s0 = img[0] + img[1] + img[2] + img[3];
+ *pred = (s0+2) >> 2;
+ }
+ else if (left_avail)
+ {
+ s0 = curr_img[left[blk_y].pos_y][left[blk_y].pos_x];
+ s0 += curr_img[left[blk_y+1].pos_y][left[blk_y+1].pos_x];
+ s0 += curr_img[left[blk_y+2].pos_y][left[blk_y+2].pos_x];
+ s0 += curr_img[left[blk_y+3].pos_y][left[blk_y+3].pos_x];
+
+ *pred = (s0+2) >> 2;
+ }
+}
+
+
+static void intra_chroma_DC_all(imgpel **curr_img, int up_avail, int left_avail, PixelPos up, PixelPos left[17], int blk_x, int blk_y, int *pred )
+{
+ int s0 = 0, s1 = 0;
+
+ if (up_avail)
+ {
+ imgpel *img = &curr_img[up.pos_y][up.pos_x + blk_x];
+ s0 = img[0] + img[1] + img[2] + img[3];
+ }
+
+ if (left_avail)
+ {
+ s1 += curr_img[left[blk_y].pos_y][left[blk_y].pos_x];
+ s1 += curr_img[left[blk_y+1].pos_y][left[blk_y+1].pos_x];
+ s1 += curr_img[left[blk_y+2].pos_y][left[blk_y+2].pos_x];
+ s1 += curr_img[left[blk_y+3].pos_y][left[blk_y+3].pos_x];
+ }
+
+ if (up_avail && left_avail)
+ *pred = (s0 + s1 + 4) >> 3;
+ else if (up_avail)
+ *pred = (s0 + 2) >> 2;
+ else if (left_avail)
+ *pred = (s1 + 2) >> 2;
+}
+
+/*!
+************************************************************************
+* \brief
+* Chroma Intra prediction. Note that many operations can be moved
+* outside since they are repeated for both components for no reason.
+************************************************************************
+*/
+
+static void memset_4x4(h264_imgpel_macroblock_row_t *mb_pred, int offset_x, int pred)
+{
+#ifdef _M_IX86
+ // benski> can't believe the shitty code that the compiler generated... this code is better
+ int dword_pred = pred * 0x01010101;
+ mb_pred = (h264_imgpel_macroblock_row_t *)&mb_pred[0][offset_x];
+ *(int *)mb_pred[0] = dword_pred;
+ *(int *)mb_pred[1] = dword_pred;
+ *(int *)mb_pred[2] = dword_pred;
+ *(int *)mb_pred[3] = dword_pred;
+#else
+ int ii, jj;
+ for (jj = 0; jj < BLOCK_SIZE; jj++)
+ {
+ for (ii = 0; ii < BLOCK_SIZE; ii++)
+ {
+ mb_pred[jj][offset_x+ii]=(imgpel) pred;
+ }
+ }
+#endif
+}
+
+static void chroma_dc_pred8(VideoParameters *p_Vid, int yuv, imgpel **imgUV, int up_avail, int left_avail[2], PixelPos up, PixelPos left[17], h264_imgpel_macroblock_row_t *mb_pred)
+{
+ static const byte block_pos[3][4][4]= //[yuv][b8][b4]
+ {
+ { {0, 1, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0},{0, 0, 0, 0}},
+ { {0, 1, 2, 3},{2, 3, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0}},
+ { {0, 1, 2, 3},{1, 1, 3, 3},{2, 3, 2, 3},{3, 3, 3, 3}}
+ };
+
+ int b8, b4;
+ int pred;
+
+ // DC prediction
+ // Note that unlike what is stated in many presentations and papers, this mode does not operate
+ // the same way as I_16x16 DC prediction.
+
+ for(b8 = 0; b8 < (p_Vid->num_uv_blocks) ;b8++)
+ {
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ int blk_y = subblk_offset_y[yuv][b8][b4];
+ int blk_x = subblk_offset_x[yuv][b8][b4];
+
+ pred = p_Vid->dc_pred_value_comp[1];
+
+ //===== get prediction value =====
+ switch (block_pos[yuv][b8][b4])
+ {
+ case 0: //===== TOP LEFT =====
+ intra_chroma_DC_all (imgUV, up_avail, left_avail[0], up, left, blk_x, blk_y + 1, &pred);
+ break;
+ case 1: //===== TOP RIGHT =====
+ intra_chroma_DC_single(imgUV, up_avail, left_avail[0], up, left, blk_x, blk_y + 1, &pred, 1);
+ break;
+ case 2: //===== BOTTOM LEFT =====
+ intra_chroma_DC_single(imgUV, up_avail, left_avail[1], up, left, blk_x, blk_y + 1, &pred, 0);
+ break;
+ case 3: //===== BOTTOM RIGHT =====
+ intra_chroma_DC_all (imgUV, up_avail, left_avail[1], up, left, blk_x, blk_y + 1, &pred);
+ break;
+ }
+
+ memset_4x4(mb_pred+blk_y, blk_x, pred);
+ }
+ }
+
+}
+
+static void chroma_pred_horiz8(int cr_MB_x, int cr_MB_y, PixelPos left[17], imgpel **imgUV, h264_imgpel_macroblock_row_t *mb_pred)
+{
+ // Horizontal Prediction
+ int i,j;
+
+ if (cr_MB_x == 8)
+ {
+ for (j = 0; j < cr_MB_y; ++j)
+ {
+
+ int pred = imgUV[left[1 + j].pos_y][left[1 + j].pos_x];
+ for (i = 0; i < 8; ++i)
+ mb_pred[j][i]=(imgpel) pred;
+ }
+ }
+ else
+ {
+ assert(cr_MB_x == 16);
+ for (j = 0; j < cr_MB_y; ++j)
+ {
+
+ int pred = imgUV[left[1 + j].pos_y][left[1 + j].pos_x];
+ for (i = 0; i < 16; ++i)
+ mb_pred[j][i]=(imgpel) pred;
+ }
+ }
+}
+
+static void chroma_pred_vert8(int cr_MB_x, int cr_MB_y, PixelPos up, imgpel **imgUV, h264_imgpel_macroblock_row_t *mb_pred)
+{
+ // Vertical Prediction
+ const imgpel *source = &(imgUV[up.pos_y][up.pos_x]);
+ if (cr_MB_x == 8)
+ {
+ int j;
+ for (j = 0; j < cr_MB_y; ++j)
+ {
+ memcpy(mb_pred[j], source, 8 * sizeof(imgpel));
+ }
+ }
+ else
+ {
+ int j;
+ assert(cr_MB_x == 16);
+
+
+ for (j = 0; j < cr_MB_y; ++j)
+ {
+ memcpy(mb_pred[j], source, 16 * sizeof(imgpel));
+ }
+ }
+}
+
+static void chroma_pred_plane8(int cr_MB_x, int cr_MB_y, int cr_MB_x2, int cr_MB_y2, PixelPos up, PixelPos left[17], int max_imgpel_value, imgpel **imgUV, h264_imgpel_macroblock_row_t *mb_pred)
+{
+ int ih, iv, ib, ic, i, j, iaa;
+ imgpel *upPred = &imgUV[up.pos_y][up.pos_x];
+
+ ih = cr_MB_x2 * (upPred[cr_MB_x - 1] - imgUV[left[0].pos_y][left[0].pos_x]);
+ for (i = 0; i < cr_MB_x2 - 1; ++i)
+ ih += (i + 1) * (upPred[cr_MB_x2 + i] - upPred[cr_MB_x2 - 2 - i]);
+
+ iv = cr_MB_y2 * (imgUV[left[cr_MB_y].pos_y][left[cr_MB_y].pos_x] - imgUV[left[0].pos_y][left[0].pos_x]);
+ for (i = 0; i < cr_MB_y2 - 1; ++i)
+ iv += (i + 1)*(imgUV[left[cr_MB_y2 + 1 + i].pos_y][left[cr_MB_y2 + 1 + i].pos_x] -
+ imgUV[left[cr_MB_y2 - 1 - i].pos_y][left[cr_MB_y2 - 1 - i].pos_x]);
+
+ ib= ((cr_MB_x == 8 ? 17 : 5) * ih + 2 * cr_MB_x)>>(cr_MB_x == 8 ? 5 : 6);
+ ic= ((cr_MB_y == 8 ? 17 : 5) * iv + 2 * cr_MB_y)>>(cr_MB_y == 8 ? 5 : 6);
+
+ iaa=16*(imgUV[left[cr_MB_y].pos_y][left[cr_MB_y].pos_x] + upPred[cr_MB_x-1]);
+
+ for (j = 0; j < cr_MB_y; ++j)
+ for (i = 0; i < cr_MB_x; ++i)
+ mb_pred[j][i]=(imgpel) iClip1(max_imgpel_value, ((iaa + (i - cr_MB_x2 + 1) * ib + (j - cr_MB_y2 + 1) * ic + 16) >> 5));
+}
+
+// TODO: benski> replace with PredictIntraChroma8x8_H264 ?
+void intrapred_chroma(Macroblock *currMB, int uv)
+{
+ if (currMB->c_ipred_mode == VERT_PRED_8)
+ {
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ imgpel **imgUV = dec_picture->imgUV[uv]->img;
+
+ h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[uv + 1];
+
+ PixelPos up; //!< pixel position p(0,-1)
+
+ int up_avail;
+
+ int cr_MB_x = p_Vid->mb_cr_size_x;
+ int cr_MB_y = p_Vid->mb_cr_size_y;
+
+ p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &up);
+
+ if (!p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ up_avail = up.available;
+ }
+ else
+ {
+ up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
+ }
+
+ // Vertical Prediction
+ if (!up_avail)
+ error("unexpected VERT_PRED_8 chroma intra prediction mode",-1);
+
+ chroma_pred_vert8(cr_MB_x, cr_MB_y, up, imgUV, mb_pred);
+ }
+ else
+ {
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ imgpel **imgUV = dec_picture->imgUV[uv]->img;
+ int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1];
+
+ int yuv = dec_picture->chroma_format_idc - 1;
+ h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[uv + 1];
+
+
+ PixelPos up; //!< pixel position p(0,-1)
+ PixelPos left[17]; //!< pixel positions p(-1, -1..16)
+
+ int up_avail, left_avail[2], left_up_avail;
+
+ int cr_MB_x = p_Vid->mb_cr_size_x;
+ int cr_MB_y = p_Vid->mb_cr_size_y;
+ int cr_MB_y2 = (cr_MB_y >> 1);
+ int cr_MB_x2 = (cr_MB_x >> 1);
+
+ p_Vid->getNeighbourNX(currMB, -1, p_Vid->mb_size[IS_CHROMA], &left[0]);
+ p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[IS_CHROMA], &left[1]);
+
+ p_Vid->getNeighbourNPChromaNB(currMB, 2-1, p_Vid->mb_size[IS_CHROMA], &left[2]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 3-1, p_Vid->mb_size[IS_CHROMA], &left[3]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 4-1, p_Vid->mb_size[IS_CHROMA], &left[4]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 5-1, p_Vid->mb_size[IS_CHROMA], &left[5]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 6-1, p_Vid->mb_size[IS_CHROMA], &left[6]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 7-1, p_Vid->mb_size[IS_CHROMA], &left[7]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 8-1, p_Vid->mb_size[IS_CHROMA], &left[8]);
+
+ if (cr_MB_y == 16)
+ {
+ p_Vid->getNeighbourNPChromaNB(currMB, 9-1, p_Vid->mb_size[IS_CHROMA], &left[9]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 10-1, p_Vid->mb_size[IS_CHROMA], &left[10]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 11-1, p_Vid->mb_size[IS_CHROMA], &left[11]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 12-1, p_Vid->mb_size[IS_CHROMA], &left[12]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 13-1, p_Vid->mb_size[IS_CHROMA], &left[13]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 14-1, p_Vid->mb_size[IS_CHROMA], &left[14]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 15-1, p_Vid->mb_size[IS_CHROMA], &left[15]);
+ p_Vid->getNeighbourNPChromaNB(currMB, 16-1, p_Vid->mb_size[IS_CHROMA], &left[16]);
+ }
+
+ p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[IS_CHROMA], &up);
+
+ if (!p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ up_avail = up.available;
+ left_avail[0] = left_avail[1] = left[1].available;
+ left_up_avail = left[0].available;
+ }
+ else
+ {
+ up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
+ for (i=0, left_avail[0] = 1; i < cr_MB_y2;++i)
+ left_avail[0] &= left[i + 1].available ? p_Vid->intra_block[left[i + 1].mb_addr]: 0;
+
+ for (i = cr_MB_y2, left_avail[1] = 1; i<cr_MB_y;++i)
+ left_avail[1] &= left[i + 1].available ? p_Vid->intra_block[left[i + 1].mb_addr]: 0;
+
+ left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
+ }
+
+ switch (currMB->c_ipred_mode)
+ {
+ case DC_PRED_8:
+ chroma_dc_pred8(p_Vid, yuv, imgUV, up_avail, left_avail, up, left, mb_pred);
+ break;
+ case HOR_PRED_8:
+ {
+ // Horizontal Prediction
+ if (!left_avail[0] || !left_avail[1])
+ error("unexpected HOR_PRED_8 chroma intra prediction mode",-1);
+
+ chroma_pred_horiz8(cr_MB_x, cr_MB_y, left, imgUV, mb_pred);
+ }
+ break;
+ case PLANE_8:
+ // plane prediction
+ if (!left_up_avail || !left_avail[0] || !left_avail[1] || !up_avail)
+ error("unexpected PLANE_8 chroma intra prediction mode",-1);
+ else
+ {
+ chroma_pred_plane8(cr_MB_x, cr_MB_y, cr_MB_x2, cr_MB_y2, up, left, max_imgpel_value, imgUV, mb_pred);
+ }
+ break;
+ default:
+ error("illegal chroma intra prediction mode", 600);
+ break;
+ }
+ }
+}
+
+
+
+
diff --git a/Src/h264dec/ldecod/src/ldecod.c b/Src/h264dec/ldecod/src/ldecod.c
new file mode 100644
index 00000000..39919e1e
--- /dev/null
+++ b/Src/h264dec/ldecod/src/ldecod.c
@@ -0,0 +1,639 @@
+
+/*!
+ ***********************************************************************
+ * \mainpage
+ * This is the H.264/AVC decoder reference software. For detailed documentation
+ * see the comments in each file.
+ *
+ * The JM software web site is located at:
+ * http://iphome.hhi.de/suehring/tml
+ *
+ * For bug reporting and known issues see:
+ * https://ipbt.hhi.de
+ *
+ * \author
+ * The main contributors are listed in contributors.h
+ *
+ * \version
+ * JM 16.1 (FRExt)
+ *
+ * \note
+ * tags are used for document system "doxygen"
+ * available at http://www.doxygen.org
+ */
+/*!
+ * \file
+ * ldecod.c
+ * \brief
+ * H.264/AVC reference decoder project main()
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Inge Lille-Langøy <inge.lille-langoy@telenor.com>
+ * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ * - Jani Lainema <jani.lainema@nokia.com>
+ * - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de>
+ * - Byeong-Moon Jeon <jeonbm@lge.com>
+ * - Gabi Blaettermann
+ * - Ye-Kui Wang <wyk@ieee.org>
+ * - Valeri George <george@hhi.de>
+ * - Karsten Suehring <suehring@hhi.de>
+ *
+ ***********************************************************************
+ */
+
+#include "contributors.h"
+
+#include <sys/stat.h>
+
+#include "global.h"
+#include "image.h"
+#include "memalloc.h"
+#include "mc_prediction.h"
+#include "mbuffer.h"
+#include "leaky_bucket.h"
+#include "fmo.h"
+#include "output.h"
+#include "cabac.h"
+#include "parset.h"
+#include "sei.h"
+#include "erc_api.h"
+#include "quant.h"
+#include "block.h"
+#include "nalu.h"
+#include "meminput.h"
+#define LOGFILE "log.dec"
+#define DATADECFILE "dataDec.txt"
+#define TRACEFILE "trace_dec.txt"
+
+// Decoder definition. This should be the only global variable in the entire
+// software. Global variables should be avoided.
+char errortext[ET_SIZE]; //!< buffer for error message for exit with error()
+
+#ifdef TRACE
+FILE *p_trace=0;
+int bitcounter=0;
+#endif
+
+// Prototypes of static functions
+void init (VideoParameters *p_Vid);
+void malloc_slice(InputParameters *p_Inp, VideoParameters *p_Vid);
+void free_slice (Slice *currSlice);
+
+void init_frext(VideoParameters *p_Vid);
+
+/*!
+ ************************************************************************
+ * \brief
+ * Error handling procedure. Print error message to stderr and exit
+ * with supplied code.
+ * \param text
+ * Error message
+ * \param code
+ * Exit code
+ ************************************************************************
+ */
+void error(char *text, int code)
+{
+ RaiseException(code, 0, 1, (ULONG_PTR *)text);
+ //fprintf(stderr, "%s\n", text);
+ //flush_dpb(p_Dec->p_Vid);
+ //exit(code);
+}
+
+/*static */void Configure(VideoParameters *p_Vid, InputParameters *p_Inp)
+{
+ p_Vid->p_Inp = p_Inp;
+
+ p_Inp->intra_profile_deblocking = 0;
+
+#ifdef _LEAKYBUCKET_
+ p_Inp->R_decoder=500000; //! Decoder rate
+ p_Inp->B_decoder=104000; //! Decoder buffer size
+ p_Inp->F_decoder=73000; //! Decoder initial delay
+ strcpy(p_Inp->LeakyBucketParamFile,"leakybucketparam.cfg"); // file where Leaky Bucket parameters (computed by encoder) are stored
+#endif
+
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Allocate the Image structure
+ * \par Output:
+ * Image Parameters VideoParameters *p_Vid
+ ***********************************************************************
+ */
+static void alloc_img( VideoParameters **p_Vid)
+{
+ if ((*p_Vid = (VideoParameters *) calloc(1, sizeof(VideoParameters)))==NULL)
+ no_mem_exit("alloc_img: p_Vid");
+
+ if (((*p_Vid)->old_slice = (OldSliceParams *) calloc(1, sizeof(OldSliceParams)))==NULL)
+ no_mem_exit("alloc_img: p_Vid->old_slice");
+
+ if (((*p_Vid)->p_Dpb = (DecodedPictureBuffer*)calloc(1, sizeof(DecodedPictureBuffer)))==NULL)
+ no_mem_exit("alloc_img: p_Vid->p_Dpb");
+
+ (*p_Vid)->p_Dpb->init_done = 0;
+
+ (*p_Vid)->global_init_done = 0;
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ if (((*p_Vid)->seiToneMapping = (ToneMappingSEI*)calloc(1, sizeof(ToneMappingSEI)))==NULL)
+ no_mem_exit("alloc_img: (*p_Vid)->seiToneMapping");
+#endif
+
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Allocate the Input structure
+ * \par Output:
+ * Input Parameters InputParameters *p_Vid
+ ***********************************************************************
+ */
+static void alloc_params( InputParameters **p_Inp )
+{
+ if ((*p_Inp = (InputParameters *) calloc(1, sizeof(InputParameters)))==NULL)
+ no_mem_exit("alloc_params: p_Inp");
+}
+
+ /*!
+ ***********************************************************************
+ * \brief
+ * Allocate the Decoder Structure
+ * \par Output:
+ * Decoder Parameters
+ ***********************************************************************
+ */
+DecoderParams *alloc_decoder()
+{
+ DecoderParams *decoder = (DecoderParams *) calloc(1, sizeof(DecoderParams));
+ if (decoder)
+
+ {
+ alloc_img(&(decoder->p_Vid));
+ alloc_params(&(decoder->p_Inp));
+#ifdef TRACE
+ p_trace = 0;
+ bitcounter = 0;
+#endif
+ }
+ return decoder;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Free the Image structure
+ * \par Input:
+ * Image Parameters VideoParameters *p_Vid
+ ***********************************************************************
+ */
+void free_img( VideoParameters *p_Vid)
+{
+ if (p_Vid != NULL)
+ {
+ free_mem_input(p_Vid);
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ if (p_Vid->seiToneMapping != NULL)
+ {
+ free (p_Vid->seiToneMapping);
+ p_Vid->seiToneMapping = NULL;
+ }
+#endif
+
+ if (p_Vid->p_Dpb != NULL)
+ {
+ free (p_Vid->p_Dpb);
+ p_Vid->p_Dpb = NULL;
+ }
+ if (p_Vid->old_slice != NULL)
+ {
+ free (p_Vid->old_slice);
+ p_Vid->old_slice = NULL;
+ }
+
+ free (p_Vid);
+ p_Vid = NULL;
+ }
+}
+/*!
+ ***********************************************************************
+ * \brief
+ * main function for TML decoder
+ ***********************************************************************
+ */
+#if 0
+int main(int argc, char **argv)
+{
+ DecoderParams *p_Dec = alloc_decoder();
+ if (!p_Dec)
+ return 1;
+
+ Configure(p_Dec->p_Vid, p_Dec->p_Inp, argc, argv);
+
+ initBitsFile(p_Dec->p_Vid, p_Dec->p_Inp->FileFormat);
+
+ p_Dec->p_Vid->bitsfile->OpenBitsFile(p_Dec->p_Vid, p_Dec->p_Inp->infile);
+
+ // Allocate Slice data struct
+ malloc_slice(p_Dec->p_Inp, p_Dec->p_Vid);
+ init_old_slice(p_Dec->p_Vid->old_slice);
+
+ init(p_Dec->p_Vid);
+
+ init_out_buffer(p_Dec->p_Vid);
+
+ while (decode_one_frame(p_Dec->p_Vid) != EOS)
+ ;
+
+ free_slice(p_Dec->p_Vid->currentSlice);
+ FmoFinit(p_Dec->p_Vid);
+
+ free_global_buffers(p_Dec->p_Vid);
+ flush_dpb(p_Dec->p_Vid);
+
+#if (PAIR_FIELDS_IN_OUTPUT)
+ flush_pending_output(p_Dec->p_Vid, p_Dec->p_Vid->p_out);
+#endif
+
+ p_Dec->p_Vid->bitsfile->CloseBitsFile(p_Dec->p_Vid);
+
+ close(p_Dec->p_Vid->p_out);
+
+ if (p_Dec->p_Vid->p_ref != -1)
+ close(p_Dec->p_Vid->p_ref);
+
+#if TRACE
+ fclose(p_trace);
+#endif
+
+ ercClose(p_Dec->p_Vid, p_Dec->p_Vid->erc_errorVar);
+
+ CleanUpPPS(p_Dec->p_Vid);
+ free_dpb(p_Dec->p_Vid);
+ uninit_out_buffer(p_Dec->p_Vid);
+
+ free (p_Dec->p_Inp);
+ free_img (p_Dec->p_Vid);
+ free(p_Dec);
+
+ return 0;
+}
+#endif
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Initilize some arrays
+ ***********************************************************************
+ */
+void init(VideoParameters *p_Vid) //!< image parameters
+{
+ int i;
+ InputParameters *p_Inp = p_Vid->p_Inp;
+ p_Vid->oldFrameSizeInMbs = -1;
+
+ p_Vid->recovery_point = 0;
+ p_Vid->recovery_point_found = 0;
+ p_Vid->recovery_poc = 0x7fffffff; /* set to a max value */
+
+ p_Vid->number = 0;
+ p_Vid->type = I_SLICE;
+
+ p_Vid->dec_ref_pic_marking_buffer = NULL;
+
+ p_Vid->dec_picture = NULL;
+ // reference flag initialization
+ for(i=0;i<17;++i)
+ {
+ p_Vid->ref_flag[i] = 1;
+ }
+
+ p_Vid->MbToSliceGroupMap = NULL;
+ p_Vid->MapUnitToSliceGroupMap = NULL;
+
+ p_Vid->LastAccessUnitExists = 0;
+ p_Vid->NALUCount = 0;
+
+
+ p_Vid->out_buffer = NULL;
+ p_Vid->pending_output = NULL;
+ p_Vid->pending_output_state = FRAME;
+ p_Vid->recovery_flag = 0;
+
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ init_tone_mapping_sei(p_Vid->seiToneMapping);
+#endif
+
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ * Initialize FREXT variables
+ ***********************************************************************
+ */
+void init_frext(VideoParameters *p_Vid) //!< image parameters
+{
+ //pel bitdepth init
+ p_Vid->bitdepth_luma_qp_scale = 6 * (p_Vid->bitdepth_luma - 8);
+
+ p_Vid->dc_pred_value_comp[0] = 1<<(p_Vid->bitdepth_luma - 1);
+ p_Vid->max_pel_value_comp[0] = (1<<p_Vid->bitdepth_luma) - 1;
+ p_Vid->mb_size[IS_LUMA][0] = p_Vid->mb_size[IS_LUMA][1] = MB_BLOCK_SIZE;
+
+ if (p_Vid->active_sps->chroma_format_idc != YUV400)
+ {
+ //for chrominance part
+ p_Vid->bitdepth_chroma_qp_scale = 6 * (p_Vid->bitdepth_chroma - 8);
+ p_Vid->dc_pred_value_comp[1] = (1 << (p_Vid->bitdepth_chroma - 1));
+ p_Vid->dc_pred_value_comp[2] = p_Vid->dc_pred_value_comp[1];
+ p_Vid->max_pel_value_comp[1] = (1 << p_Vid->bitdepth_chroma) - 1;
+ p_Vid->max_pel_value_comp[2] = (1 << p_Vid->bitdepth_chroma) - 1;
+ p_Vid->num_blk8x8_uv = (1 << p_Vid->active_sps->chroma_format_idc) & (~(0x1));
+ p_Vid->num_uv_blocks = (p_Vid->num_blk8x8_uv >> 1);
+ p_Vid->num_cdc_coeff = (p_Vid->num_blk8x8_uv << 1);
+ p_Vid->mb_size[IS_CHROMA][0] = p_Vid->mb_size[2][0] = p_Vid->mb_cr_size_x = (p_Vid->active_sps->chroma_format_idc==YUV420 || p_Vid->active_sps->chroma_format_idc==YUV422)? 8 : 16;
+ p_Vid->mb_size[IS_CHROMA][1] = p_Vid->mb_size[2][1] = p_Vid->mb_cr_size_y = (p_Vid->active_sps->chroma_format_idc==YUV444 || p_Vid->active_sps->chroma_format_idc==YUV422)? 16 : 8;
+
+ p_Vid->subpel_x = p_Vid->mb_cr_size_x == 8 ? 7 : 3;
+ p_Vid->subpel_y = p_Vid->mb_cr_size_y == 8 ? 7 : 3;
+ p_Vid->shiftpel_x = p_Vid->mb_cr_size_x == 8 ? 3 : 2;
+ p_Vid->shiftpel_y = p_Vid->mb_cr_size_y == 8 ? 3 : 2;
+ }
+ else
+ {
+ p_Vid->bitdepth_chroma_qp_scale = 0;
+ p_Vid->max_pel_value_comp[1] = 0;
+ p_Vid->max_pel_value_comp[2] = 0;
+ p_Vid->num_blk8x8_uv = 0;
+ p_Vid->num_uv_blocks = 0;
+ p_Vid->num_cdc_coeff = 0;
+ p_Vid->mb_size[IS_CHROMA][0] = p_Vid->mb_size[2][0] = p_Vid->mb_cr_size_x = 0;
+ p_Vid->mb_size[IS_CHROMA][1] = p_Vid->mb_size[2][1] = p_Vid->mb_cr_size_y = 0;
+
+ p_Vid->subpel_x = 0;
+ p_Vid->subpel_y = 0;
+ p_Vid->shiftpel_x = 0;
+ p_Vid->shiftpel_y = 0;
+ }
+ p_Vid->mb_size_blk[0][0] = p_Vid->mb_size_blk[0][1] = p_Vid->mb_size[0][0] >> 2;
+ p_Vid->mb_size_blk[1][0] = p_Vid->mb_size_blk[2][0] = p_Vid->mb_size[1][0] >> 2;
+ p_Vid->mb_size_blk[1][1] = p_Vid->mb_size_blk[2][1] = p_Vid->mb_size[1][1] >> 2;
+
+ p_Vid->mb_size_shift[0][0] = p_Vid->mb_size_shift[0][1] = CeilLog2_sf (p_Vid->mb_size[0][0]);
+ p_Vid->mb_size_shift[1][0] = p_Vid->mb_size_shift[2][0] = CeilLog2_sf (p_Vid->mb_size[1][0]);
+ p_Vid->mb_size_shift[1][1] = p_Vid->mb_size_shift[2][1] = CeilLog2_sf (p_Vid->mb_size[1][1]);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocates a stand-alone partition structure. Structure should
+ * be freed by FreePartition();
+ * data structures
+ *
+ * \par Input:
+ * n: number of partitions in the array
+ * \par return
+ * pointer to DataPartition Structure, zero-initialized
+ ************************************************************************
+ */
+
+DataPartition *AllocPartition(int n)
+{
+ DataPartition *partArr, *dataPart;
+ int i;
+
+ partArr = (DataPartition *) calloc(n, sizeof(DataPartition));
+ if (partArr == NULL)
+ {
+ snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for Data Partition failed");
+ error(errortext, 100);
+ }
+
+ for (i=0; i<n; ++i) // loop over all data partitions
+ {
+ dataPart = &(partArr[i]);
+ dataPart->bitstream = (Bitstream *) calloc(1, sizeof(Bitstream));
+ if (dataPart->bitstream == NULL)
+ {
+ snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for Bitstream failed");
+ error(errortext, 100);
+ }
+ dataPart->bitstream->streamBuffer = 0;
+ }
+ return partArr;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Frees a partition structure (array).
+ *
+ * \par Input:
+ * Partition to be freed, size of partition Array (Number of Partitions)
+ *
+ * \par return
+ * None
+ *
+ * \note
+ * n must be the same as for the corresponding call of AllocPartition
+ ************************************************************************
+ */
+
+
+void FreePartition (DataPartition *dp, int n)
+{
+ int i;
+
+ assert (dp != NULL);
+ assert (dp->bitstream != NULL);
+ //assert (dp->bitstream->streamBuffer != NULL);
+ for (i=0; i<n; ++i)
+ {
+ //free (dp[i].bitstream->streamBuffer);
+ free (dp[i].bitstream);
+ }
+ free (dp);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocates the slice structure along with its dependent
+ * data structures
+ *
+ * \par Input:
+ * Input Parameters InputParameters *p_Inp, VideoParameters *p_Vid
+ ************************************************************************
+ */
+void malloc_slice(InputParameters *p_Inp, VideoParameters *p_Vid)
+{
+ int memory_size = 0;
+ Slice *currSlice;
+
+ p_Vid->currentSlice = (Slice *) _aligned_malloc(sizeof(Slice), 32);
+ if ( (currSlice = p_Vid->currentSlice) == NULL)
+ {
+ error("Memory allocation for Slice datastruct failed",100);
+ }
+ memset(p_Vid->currentSlice, 0, sizeof(Slice));
+ // p_Vid->currentSlice->rmpni_buffer=NULL;
+ //! you don't know whether we do CABAC here, hence initialize CABAC anyway
+ // if (p_Inp->symbol_mode == CABAC)
+
+ // create all context models
+ currSlice->mot_ctx = create_contexts_MotionInfo();
+ currSlice->tex_ctx = create_contexts_TextureInfo();
+
+
+ currSlice->max_part_nr = 3; //! assume data partitioning (worst case) for the following mallocs()
+ currSlice->partArr = AllocPartition(currSlice->max_part_nr);
+ currSlice->p_colocated = NULL;
+
+ currSlice->coeff_ctr = -1;
+ currSlice->pos = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Memory frees of the Slice structure and of its dependent
+ * data structures
+ *
+ * \par Input:
+ * Input Parameters InputParameters *p_Inp, VideoParameters *p_Vid
+ ************************************************************************
+ */
+void free_slice(Slice *currSlice)
+{
+ FreePartition (currSlice->partArr, 3);
+
+ if (1)
+ {
+ // delete all context models
+ delete_contexts_MotionInfo(currSlice->mot_ctx);
+ delete_contexts_TextureInfo(currSlice->tex_ctx);
+ }
+ _aligned_free(currSlice);
+
+ currSlice = NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Dynamic memory allocation of frame size related global buffers
+ * buffers are defined in global.h, allocated memory must be freed in
+ * void free_global_buffers()
+ *
+ * \par Input:
+ * Input Parameters InputParameters *p_Inp, Image Parameters VideoParameters *p_Vid
+ *
+ * \par Output:
+ * Number of allocated bytes
+ ***********************************************************************
+ */
+int init_global_buffers(VideoParameters *p_Vid)
+{
+ int memory_size=0;
+ int i;
+
+ if (p_Vid->global_init_done)
+ {
+ free_global_buffers(p_Vid);
+ }
+
+ // allocate memory in structure p_Vid
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ for( i=0; i<MAX_PLANE; ++i )
+ {
+ if(((p_Vid->mb_data_JV[i]) = (Macroblock *) calloc(p_Vid->FrameSizeInMbs, sizeof(Macroblock))) == NULL)
+ no_mem_exit("init_global_buffers: p_Vid->mb_data");
+ }
+ p_Vid->mb_data = NULL;
+ }
+ else
+ {
+ if(((p_Vid->mb_data) = (Macroblock *) calloc(p_Vid->FrameSizeInMbs, sizeof(Macroblock))) == NULL)
+ no_mem_exit("init_global_buffers: p_Vid->mb_data");
+ }
+
+ if(((p_Vid->intra_block) = (int*)calloc(p_Vid->FrameSizeInMbs, sizeof(int))) == NULL)
+ no_mem_exit("init_global_buffers: p_Vid->intra_block");
+
+ p_Vid->PicPos = (h264_pic_position *)calloc(p_Vid->FrameSizeInMbs + 1, sizeof(h264_pic_position)); //! Helper array to access macroblock positions. We add 1 to also consider last MB.
+
+ for (i = 0; i < (int) p_Vid->FrameSizeInMbs + 1;++i)
+ {
+ p_Vid->PicPos[i][0] = (i % p_Vid->PicWidthInMbs);
+ p_Vid->PicPos[i][1] = (i / p_Vid->PicWidthInMbs);
+ }
+
+ memory_size += get_mem2D(&(p_Vid->ipredmode), 4*p_Vid->FrameHeightInMbs, 4*p_Vid->PicWidthInMbs);
+
+ // CAVLC mem
+ p_Vid->nz_coeff = (h264_nz_coefficient *)_aligned_malloc(p_Vid->FrameSizeInMbs*sizeof(h264_nz_coefficient), 32);
+ memset(p_Vid->nz_coeff, 0, p_Vid->FrameSizeInMbs*sizeof(h264_nz_coefficient));
+ //memory_size += get_mem4D(&(p_Vid->nz_coeff), p_Vid->FrameSizeInMbs, 3, BLOCK_SIZE, BLOCK_SIZE);
+
+ memory_size += get_mem2Dint(&(p_Vid->siblock), p_Vid->FrameHeightInMbs, p_Vid->PicWidthInMbs);
+
+ init_qp_process(p_Vid);
+
+ p_Vid->global_init_done = 1;
+
+ p_Vid->oldFrameSizeInMbs = p_Vid->FrameSizeInMbs;
+
+ return (memory_size);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Free allocated memory of frame size related global buffers
+ * buffers are defined in global.h, allocated memory is allocated in
+ * int init_global_buffers()
+ *
+ * \par Input:
+ * Input Parameters InputParameters *p_Inp, Image Parameters VideoParameters *p_Vid
+ *
+ * \par Output:
+ * none
+ *
+ ************************************************************************
+ */
+void free_global_buffers(VideoParameters *p_Vid)
+{
+ // CAVLC free mem
+ _aligned_free(p_Vid->nz_coeff);
+
+ free_mem2Dint(p_Vid->siblock);
+
+ // free mem, allocated for structure p_Vid
+ if (p_Vid->mb_data != NULL)
+ free(p_Vid->mb_data);
+
+ free(p_Vid->PicPos);
+
+ free (p_Vid->intra_block);
+ free_mem2D(p_Vid->ipredmode);
+
+ free_qp_matrices(p_Vid);
+
+ p_Vid->global_init_done = 0;
+
+}
diff --git a/Src/h264dec/ldecod/src/loopFilter.c b/Src/h264dec/ldecod/src/loopFilter.c
new file mode 100644
index 00000000..c1c19a2f
--- /dev/null
+++ b/Src/h264dec/ldecod/src/loopFilter.c
@@ -0,0 +1,1338 @@
+
+/*!
+*************************************************************************************
+* \file loopFilter.c
+*
+* \brief
+* Filter to reduce blocking artifacts on a macroblock level.
+* The filter strength is QP dependent.
+*
+* \author
+* Contributors:
+* - Peter List Peter.List@t-systems.de: Original code (13-Aug-2001)
+* - Jani Lainema Jani.Lainema@nokia.com: Some bug fixing, removal of recursiveness (16-Aug-2001)
+* - Peter List Peter.List@t-systems.de: inplace filtering and various simplifications (10-Jan-2002)
+* - Anthony Joch anthony@ubvideo.com: Simplified switching between filters and
+* non-recursive default filter. (08-Jul-2002)
+* - Cristina Gomila cristina.gomila@thomson.net: Simplification of the chroma deblocking
+* from JVT-E089 (21-Nov-2002)
+* - Alexis Michael Tourapis atour@dolby.com: Speed/Architecture improvements (08-Feb-2007)
+*************************************************************************************
+*/
+
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+
+/*********************************************************************************************************/
+
+// NOTE: In principle, the alpha and beta tables are calculated with the formulas below
+// Alpha( qp ) = 0.8 * (2^(qp/6) - 1)
+// Beta ( qp ) = 0.5 * qp - 7
+
+// The tables actually used have been "hand optimized" though (by Anthony Joch). So, the
+// table values might be a little different to formula-generated values. Also, the first
+// few values of both tables is set to zero to force the filter off at low qp’s
+
+static const byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ;
+static const byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ;
+static const byte CLIP_TAB[52][5] =
+{
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1},
+ { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3},
+ { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6},
+ { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16},
+ { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25}
+} ;
+
+static const char chroma_edge[2][4][4] = //[dir][edge][yuv_format]
+{ { {-4, 0, 0, 0},
+{-4,-4,-4, 4},
+{-4, 4, 4, 8},
+{-4,-4,-4, 12}},
+
+{ {-4, 0, 0, 0},
+{-4,-4, 4, 4},
+{-4, 4, 8, 8},
+{-4,-4, 12, 12}}};
+
+static const int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format]
+
+/* YUV420 & non-aff optimized functions */
+void EdgeLoopLuma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, PixelPos pixMB, Macroblock *MbP);
+void EdgeLoopLuma_Horiz_YUV420(VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, PixelPos pixMB, Macroblock *MbP);
+void EdgeLoopLumaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p);
+void EdgeLoopChroma_Vert_YUV420(VideoImage *image, const uint8_t Strength[4], Macroblock *MbQ, int uv, PixelPos pixMB, Macroblock *MbP);
+void EdgeLoopChroma_Horiz_YUV420(VideoImage *image, const byte Strength[4], Macroblock *MbQ, int uv, PixelPos pixMB, Macroblock *MbP);
+void EdgeLoopChromaMBAff_Vert_YUV420(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p);
+void GetStrength_Vert_YUV420(uint8_t Strength[4], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP);
+void GetStrength_Vert_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag);
+void GetStrength_Horiz_YUV420(uint8_t Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP);
+void GetStrength_Horiz_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag);
+void GetStrength_MBAff_Vert_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p);
+void GetStrengthMBAff_Horiz_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p);
+static void Deblock_YUV420(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr);
+static void Deblock_YUV420_MBAFF(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr);
+/* */
+void EdgeLoopChromaNormal_Vert(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, int uv, StorablePicture *p);
+void EdgeLoopLumaNormal_Vert(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, StorablePicture *p);
+void GetStrengthNormal_Vert(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p);
+void GetStrengthNormal_Horiz(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p);
+static void GetStrengthNormal (byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int dir,int edge, int mvlimit,StorablePicture *p);
+static void GetStrengthMBAff (byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int dir,int edge, int mvlimit,StorablePicture *p);
+static void EdgeLoopLumaNormal(ColorPlane pl, struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, StorablePicture *p);
+static void EdgeLoopLumaMBAff (ColorPlane pl, struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, StorablePicture *p);
+static void EdgeLoopChromaNormal(struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p);
+static void EdgeLoopChromaMBAff(struct video_image *image, const byte Strength[MB_BLOCK_SIZE],Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p);
+static void DeblockMb(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr);
+static void EdgeLoopLumaMBAff_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p);
+static void EdgeLoopLumaMBAff_Vert(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p);
+
+/*!
+*****************************************************************************************
+* \brief
+* Filter all macroblocks in order of increasing macroblock address.
+*****************************************************************************************
+*/
+void DeblockPicture(VideoParameters *p_Vid, StorablePicture *p)
+{
+ unsigned i;
+
+ if (!p->mb_aff_frame_flag && p_Vid->active_sps->chroma_format_idc==YUV420 && p_Vid->getNeighbour == getNonAffNeighbour && !p_Vid->mixedModeEdgeFlag)
+ {
+ for (i = 0; i < p->PicSizeInMbs; ++i)
+ {
+ Deblock_YUV420( p_Vid, p, i ) ;
+ }
+ }
+ else if (p->mb_aff_frame_flag && p_Vid->active_sps->chroma_format_idc==YUV420 && p_Vid->getNeighbour == getAffNeighbour)
+ {
+ for (i = 0; i < p->PicSizeInMbs; ++i)
+ {
+ Deblock_YUV420_MBAFF( p_Vid, p, i ) ;
+ }
+ }
+ else
+ {
+ if (p->mb_aff_frame_flag == 1)
+ {
+ p_Vid->GetStrength = GetStrengthMBAff;
+ p_Vid->EdgeLoopLuma = EdgeLoopLumaMBAff;
+ p_Vid->EdgeLoopChroma = EdgeLoopChromaMBAff;
+ }
+ else
+ {
+ p_Vid->GetStrength = GetStrengthNormal;
+ p_Vid->EdgeLoopLuma = EdgeLoopLumaNormal;
+ p_Vid->EdgeLoopChroma = EdgeLoopChromaNormal;
+ }
+
+ for (i = 0; i < p->PicSizeInMbs; ++i)
+ {
+ DeblockMb( p_Vid, p, i ) ;
+ }
+ }
+}
+
+
+/*!
+*****************************************************************************************
+* \brief
+* Deblocking filter for one macroblock.
+*****************************************************************************************
+*/
+
+static void DeblockMb(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr)
+{
+ int EdgeCondition;
+ int dir, edge;
+ __declspec(align(32)) byte Strength[16];
+ short mb_x, mb_y;
+
+ int filterNon8x8LumaEdgesFlag[4] = {1,1,1,1};
+ int filterLeftMbEdgeFlag;
+ int filterTopMbEdgeFlag;
+ int fieldModeMbFlag;
+ int mvlimit = 4;
+ int i, StrengthSum;
+ Macroblock *MbQ = &(p_Vid->mb_data[MbQAddr]) ; // current Mb
+ VideoImage *imgY = p->imgY;
+ VideoImage *imgUV[2] = {p->imgUV[0], p->imgUV[1]};
+
+ int edge_cr;
+
+ // return, if filter is disabled
+ if (MbQ->DFDisableIdc==1)
+ {
+ p_Vid->DeblockCall = 0;
+ return;
+ }
+ p_Vid->DeblockCall = 1;
+ get_mb_pos (p_Vid, MbQAddr, p_Vid->mb_size[IS_LUMA], &mb_x, &mb_y);
+
+ filterLeftMbEdgeFlag = (mb_x != 0);
+ filterTopMbEdgeFlag = (mb_y != 0);
+
+ if (MbQ->mb_type == I8MB)
+ assert(MbQ->luma_transform_size_8x8_flag);
+
+ filterNon8x8LumaEdgesFlag[1] =
+ filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag);
+
+ if (p->mb_aff_frame_flag && mb_y == MB_BLOCK_SIZE && MbQ->mb_field)
+ filterTopMbEdgeFlag = 0;
+
+ fieldModeMbFlag = (p->structure!=FRAME) || (p->mb_aff_frame_flag && MbQ->mb_field);
+ if (fieldModeMbFlag)
+ mvlimit = 2;
+
+ if (MbQ->DFDisableIdc==2)
+ {
+ // don't filter at slice boundaries
+ filterLeftMbEdgeFlag = MbQ->mb_avail_left;
+ // if this the bottom of a frame macroblock pair then always filter the top edge
+ filterTopMbEdgeFlag = (p->mb_aff_frame_flag && !MbQ->mb_field && (MbQAddr & 0x01)) ? 1 : MbQ->mb_avail_up;
+ }
+
+ CheckAvailabilityOfNeighbors(MbQ);
+
+ for( dir = 0 ; dir < 2 ; ++dir ) // filter first vertical edges, followed by horizontal
+ {
+ EdgeCondition = (dir && filterTopMbEdgeFlag) || (!dir && filterLeftMbEdgeFlag); // can not filter beyond picture boundaries
+ for( edge=0; edge<4 ; ++edge ) // first 4 vertical strips of 16 pel
+ { // then 4 horizontal
+ if( edge || EdgeCondition )
+ {
+ edge_cr = chroma_edge[dir][edge][p->chroma_format_idc];
+
+ p_Vid->GetStrength(Strength, MbQ, dir, edge << 2, mvlimit, p); // Strength for 4 blks in 1 stripe
+ StrengthSum = Strength[0];
+ for (i = 1; i < MB_BLOCK_SIZE && StrengthSum == 0 ; ++i)
+ {
+ StrengthSum += (int) Strength[i];
+ }
+
+ if( StrengthSum ) // only if one of the 16 Strength bytes is != 0
+ {
+ if (filterNon8x8LumaEdgesFlag[edge])
+ {
+ p_Vid->EdgeLoopLuma( PLANE_Y, imgY, Strength, MbQ, dir, edge << 2, p) ;
+ if( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) )
+ {
+ p_Vid->EdgeLoopLuma(PLANE_U, imgUV[0], Strength, MbQ, dir, edge << 2, p);
+ p_Vid->EdgeLoopLuma(PLANE_V, imgUV[1], Strength, MbQ, dir, edge << 2, p);
+ }
+ }
+ if (p_Vid->active_sps->chroma_format_idc==YUV420 || p_Vid->active_sps->chroma_format_idc==YUV422)
+ {
+ if( (imgUV != NULL) && (edge_cr >= 0))
+ {
+ p_Vid->EdgeLoopChroma( imgUV[0], Strength, MbQ, dir, edge_cr, 0, p);
+ p_Vid->EdgeLoopChroma( imgUV[1], Strength, MbQ, dir, edge_cr, 1, p);
+ }
+ }
+ }
+
+ if (dir && !edge && !MbQ->mb_field && p_Vid->mixedModeEdgeFlag)
+ {
+ // this is the extra horizontal edge between a frame macroblock pair and a field above it
+ p_Vid->DeblockCall = 2;
+ p_Vid->GetStrength(Strength, MbQ, 1, MB_BLOCK_SIZE, mvlimit, p); // Strength for 4 blks in 1 stripe
+ //if( *((int*)Strength) ) // only if one of the 4 Strength bytes is != 0
+ {
+ if (filterNon8x8LumaEdgesFlag[edge])
+ {
+ p_Vid->EdgeLoopLuma(PLANE_Y, imgY, Strength, MbQ, dir, MB_BLOCK_SIZE, p) ;
+ if( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) )
+ {
+ p_Vid->EdgeLoopLuma(PLANE_U, imgUV[0], Strength, MbQ, dir, MB_BLOCK_SIZE, p) ;
+ p_Vid->EdgeLoopLuma(PLANE_V, imgUV[1], Strength, MbQ, dir, MB_BLOCK_SIZE, p) ;
+ }
+ }
+ if (p_Vid->active_sps->chroma_format_idc==YUV420 || p_Vid->active_sps->chroma_format_idc==YUV422)
+ {
+ if( (imgUV != NULL) && (edge_cr >= 0))
+ {
+ p_Vid->EdgeLoopChroma( imgUV[0], Strength, MbQ, dir, MB_BLOCK_SIZE, 0, p) ;
+ p_Vid->EdgeLoopChroma( imgUV[1], Strength, MbQ, dir, MB_BLOCK_SIZE, 1, p) ;
+ }
+ }
+ }
+ p_Vid->DeblockCall = 1;
+ }
+ }
+ }//end edge
+ }//end loop dir
+
+ p_Vid->DeblockCall = 0;
+}
+
+
+
+static void Deblock_YUV420_MBAFF(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr)
+{
+ int EdgeCondition;
+ int edge;
+ __declspec(align(32)) byte Strength[16];
+ short mb_x, mb_y;
+
+ int filterNon8x8LumaEdgesFlag[4] = {1,1,1,1};
+ int filterLeftMbEdgeFlag;
+ int filterTopMbEdgeFlag;
+ int fieldModeMbFlag;
+ int mvlimit = 4;
+ int i, StrengthSum;
+ Macroblock *MbQ = &(p_Vid->mb_data[MbQAddr]) ; // current Mb
+ VideoImage *imgY = p->imgY;
+ VideoImage *imgUV[2] = {p->imgUV[0], p->imgUV[1]};
+
+ int edge_cr;
+
+ // return, if filter is disabled
+ if (MbQ->DFDisableIdc==1)
+ {
+ p_Vid->DeblockCall = 0;
+ return;
+ }
+ p_Vid->DeblockCall = 1;
+ get_mb_block_pos_mbaff(p_Vid->PicPos, MbQAddr, &mb_x, &mb_y);
+
+ filterLeftMbEdgeFlag = (mb_x != 0);
+ filterTopMbEdgeFlag = (mb_y != 0);
+
+ if (MbQ->mb_type == I8MB)
+ assert(MbQ->luma_transform_size_8x8_flag);
+
+ filterNon8x8LumaEdgesFlag[1] =
+ filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag);
+
+ if (1 && mb_y == 1 && MbQ->mb_field)
+ filterTopMbEdgeFlag = 0;
+
+ fieldModeMbFlag = (p->structure!=FRAME) || MbQ->mb_field;
+ if (fieldModeMbFlag)
+ mvlimit = 2;
+
+ if (MbQ->DFDisableIdc==2)
+ {
+ // don't filter at slice boundaries
+ filterLeftMbEdgeFlag = MbQ->mb_avail_left;
+ // if this the bottom of a frame macroblock pair then always filter the top edge
+ filterTopMbEdgeFlag = (1 && !MbQ->mb_field && (MbQAddr & 0x01)) ? 1 : MbQ->mb_avail_up;
+ }
+
+ CheckAvailabilityOfNeighbors(MbQ);
+
+
+ EdgeCondition = filterLeftMbEdgeFlag; // can not filter beyond picture boundaries
+ for( edge=0; edge<4 ; ++edge ) // first 4 vertical strips of 16 pel
+ { // then 4 horizontal
+ if( edge || EdgeCondition )
+ {
+ edge_cr = chroma_edge[0][edge][YUV420];
+
+ GetStrength_MBAff_Vert_YUV420(Strength, MbQ, edge << 2, mvlimit, p); // Strength for 4 blks in 1 stripe
+ StrengthSum = Strength[0];
+ for (i = 1; i < MB_BLOCK_SIZE && StrengthSum == 0 ; ++i)
+ {
+ StrengthSum += (int) Strength[i];
+ }
+
+ if( StrengthSum ) // only if one of the 16 Strength bytes is != 0
+ {
+ if (filterNon8x8LumaEdgesFlag[edge])
+ {
+ EdgeLoopLumaMBAff_Vert_YUV420(imgY, Strength, MbQ, edge << 2, p) ;
+ }
+ if( (imgUV != NULL) && (edge_cr >= 0))
+ {
+ EdgeLoopChromaMBAff_Vert_YUV420( imgUV[0], Strength, MbQ, edge_cr, 0, p);
+ EdgeLoopChromaMBAff_Vert_YUV420( imgUV[1], Strength, MbQ, edge_cr, 1, p);
+ }
+ }
+ }
+ }//end edge
+
+ EdgeCondition = filterTopMbEdgeFlag; // can not filter beyond picture boundaries
+ for( edge=0; edge<4 ; ++edge ) // first 4 vertical strips of 16 pel
+ { // then 4 horizontal
+ if( edge || EdgeCondition )
+ {
+ edge_cr = chroma_edge[1][edge][YUV420];
+
+ GetStrengthMBAff_Horiz_YUV420(Strength, MbQ, edge << 2, mvlimit, p); // Strength for 4 blks in 1 stripe
+ StrengthSum = Strength[0];
+ for (i = 1; i < MB_BLOCK_SIZE && StrengthSum == 0 ; ++i)
+ {
+ StrengthSum += (int) Strength[i];
+ }
+
+ if( StrengthSum ) // only if one of the 16 Strength bytes is != 0
+ {
+ if (filterNon8x8LumaEdgesFlag[edge])
+ {
+ EdgeLoopLumaMBAff_Horiz( PLANE_Y, imgY, Strength, MbQ, edge << 2, p) ;
+ }
+ if( (imgUV != NULL) && (edge_cr >= 0))
+ {
+ EdgeLoopChromaMBAff( imgUV[0], Strength, MbQ, 1, edge_cr, 0, p);
+ EdgeLoopChromaMBAff( imgUV[1], Strength, MbQ, 1, edge_cr, 1, p);
+ }
+ }
+ if (!edge && !MbQ->mb_field && p_Vid->mixedModeEdgeFlag)
+ {
+ // this is the extra horizontal edge between a frame macroblock pair and a field above it
+ p_Vid->DeblockCall = 2;
+ GetStrengthMBAff(Strength, MbQ, 1, MB_BLOCK_SIZE, mvlimit, p); // Strength for 4 blks in 1 stripe
+ //if( *((int*)Strength) ) // only if one of the 4 Strength bytes is != 0
+ {
+ EdgeLoopLumaMBAff_Horiz(PLANE_Y, imgY, Strength, MbQ, MB_BLOCK_SIZE, p) ;
+
+ EdgeLoopChromaMBAff( imgUV[0], Strength, MbQ, 1, MB_BLOCK_SIZE, 0, p) ;
+ EdgeLoopChromaMBAff( imgUV[1], Strength, MbQ, 1, MB_BLOCK_SIZE, 1, p) ;
+
+ }
+ p_Vid->DeblockCall = 1;
+ }
+ }
+ }//end edge
+
+
+ p_Vid->DeblockCall = 0;
+}
+
+
+
+static void Deblock_YUV420(VideoParameters *p_Vid, StorablePicture *p, int MbQAddr)
+{
+ __declspec(align(32)) union
+ {
+ uint32_t as32[4];
+ uint8_t as8[16];
+ uint8_t edge[4][4];
+ } strength;
+ uint8_t alphas[2], alphas_chroma[2][2];
+ uint8_t betas[2], betas_chroma[2][2];
+ __declspec(align(32)) union
+ {
+ uint32_t as32[4];
+ uint8_t as8[16];
+ } thresholds;
+ short mb_x, mb_y;
+ int filterLeftMbEdgeFlag;
+ int filterTopMbEdgeFlag;
+ int mvlimit = 4;
+ Macroblock *MbQ = &(p_Vid->mb_data[MbQAddr]) ; // current Mb
+ Macroblock *MbP=0;
+ VideoImage *imgY = p->imgY;
+ imgpel *YQ, *UQ, *VQ;
+ VideoImage *imgUV[2] = {p->imgUV[0], p->imgUV[1]};
+ int QPQ = MbQ->qp;
+ int indexAQ = iClip3(0, MAX_QP, QPQ + MbQ->DFAlphaC0Offset);
+ int indexBQ = iClip3(0, MAX_QP, QPQ + MbQ->DFBetaOffset);
+ const byte *ClipTabQ = CLIP_TAB[indexAQ], *ClipTabQ_Chroma[2], *ClipTabP_Chroma[2];
+
+ // return, if filter is disabled
+ if (MbQ->DFDisableIdc==1)
+ {
+ p_Vid->DeblockCall = 0;
+ return;
+ }
+
+ alphas[1] = ALPHA_TABLE[indexAQ];
+ betas[1] = BETA_TABLE [indexBQ];
+
+ indexAQ = iClip3(0, MAX_QP, MbQ->qpc[0] + MbQ->DFAlphaC0Offset);
+ alphas_chroma[0][1] = ALPHA_TABLE[indexAQ];
+ ClipTabQ_Chroma[0] = CLIP_TAB [indexAQ];
+ indexAQ = iClip3(0, MAX_QP, MbQ->qpc[1] + MbQ->DFAlphaC0Offset);
+ alphas_chroma[1][1] = ALPHA_TABLE[indexAQ];
+ ClipTabQ_Chroma[1] = CLIP_TAB [indexAQ];
+
+ indexBQ = iClip3(0, MAX_QP, MbQ->qpc[0] + MbQ->DFBetaOffset);
+ betas_chroma[0][1] = BETA_TABLE[indexBQ];
+ indexBQ = iClip3(0, MAX_QP, MbQ->qpc[1] + MbQ->DFBetaOffset);
+ betas_chroma[1][1] = BETA_TABLE[indexBQ];
+
+ p_Vid->DeblockCall = 1;
+ get_mb_block_pos_normal(p_Vid->PicPos, MbQAddr, &mb_x, &mb_y);
+
+ filterLeftMbEdgeFlag = (mb_x != 0);
+ filterTopMbEdgeFlag = (mb_y != 0);
+ YQ = imgY->base_address + mb_y*16 * imgY->stride + mb_x*16;
+ UQ = imgUV[0]->base_address + mb_y * 8 * imgUV[0]->stride + mb_x * 8;
+ VQ = imgUV[1]->base_address + mb_y * 8 * imgUV[1]->stride + mb_x * 8;
+
+ if (p->structure!=FRAME)
+ mvlimit = 2;
+
+ if (MbQ->DFDisableIdc==2)
+ {
+ // don't filter at slice boundaries
+ filterLeftMbEdgeFlag = MbQ->mb_avail_left;
+ // if this the bottom of a frame macroblock pair then always filter the top edge
+ filterTopMbEdgeFlag = MbQ->mb_avail_up;
+ }
+
+ //CheckAvailabilityOfNeighbors(MbQ);
+
+#pragma region vertical
+ if(filterLeftMbEdgeFlag) // can not filter beyond picture boundaries
+ {
+ MbP = &(p_Vid->mb_data[MbQ->mb_addr_left]);
+ }
+ else
+ MbP=0;
+
+ GetStrength_Vert_YUV420_All(strength.edge, MbQ, mvlimit, p, mb_x*4, mb_y*4, MbP, MbQ->luma_transform_size_8x8_flag);
+
+ {
+ int i;
+ if (MbP)
+ {
+ int QP_Chroma0 = (MbP->qpc[0] + MbQ->qpc[0] + 1) >> 1;
+ int QP_Chroma1 = (MbP->qpc[1] + MbQ->qpc[1] + 1) >> 1;
+ int QP = (MbP->qp + QPQ + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+ const byte *ClipTab = CLIP_TAB[indexA];
+
+ alphas[0] = ALPHA_TABLE[indexA];
+ betas[0] = BETA_TABLE [indexB];
+ for (i=0;i<4;i++)
+ {
+ thresholds.as8[i] = ClipTab[strength.as8[i]];
+ }
+
+ indexA = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFAlphaC0Offset);
+ alphas_chroma[0][0] = ALPHA_TABLE[indexA];
+ ClipTabP_Chroma[0] = CLIP_TAB[indexA];
+ indexB = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFBetaOffset);
+ betas_chroma[0][0] =BETA_TABLE[indexB];
+
+ indexA = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFAlphaC0Offset);
+ alphas_chroma[1][0] = ALPHA_TABLE[indexA];
+ ClipTabP_Chroma[1] = CLIP_TAB[indexA];
+ indexB = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFBetaOffset);
+ betas_chroma[1][0] = BETA_TABLE[indexB];
+ }
+
+ for (i=4;i<16;i++)
+ {
+ thresholds.as8[i] = ClipTabQ[strength.as8[i]];
+ }
+
+ ippiFilterDeblockingLuma_VerEdge_H264_8u_C1IR(YQ, imgY->stride, alphas, betas, thresholds.as8, strength.as8);
+
+ if (MbP)
+ {
+ for (i=0;i<4;i++)
+ {
+ thresholds.as8[i] = ClipTabP_Chroma[0][strength.as8[i]];
+ }
+ }
+ for (i=4;i<8;i++)
+ {
+ thresholds.as8[i] = ClipTabQ_Chroma[0][strength.as8[i+4]];
+ }
+ ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR(UQ, imgUV[0]->stride, alphas_chroma[0], betas_chroma[0], thresholds.as8, strength.as8);
+
+ if (MbP)
+ {
+ for (i=0;i<4;i++)
+ {
+ thresholds.as8[i] = ClipTabP_Chroma[1][strength.as8[i]];
+ }
+ }
+ for (i=4;i<8;i++)
+ {
+ thresholds.as8[i] = ClipTabQ_Chroma[1][strength.as8[i+4]];
+ }
+ ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR(VQ, imgUV[1]->stride, alphas_chroma[1], betas_chroma[1], thresholds.as8, strength.as8);
+
+ }
+
+#pragma endregion
+
+
+#pragma region horizontal
+ MbP = 0;
+ /* ---- horizontal ---- */
+ // edge=0;
+ if(filterTopMbEdgeFlag) // can not filter beyond picture boundaries
+ {
+ MbP = &(p_Vid->mb_data[MbQ->mb_addr_up]);
+ }
+ else
+ {
+ MbP = 0;
+ }
+
+ GetStrength_Horiz_YUV420_All(strength.edge, MbQ, mvlimit, p, mb_x*4, mb_y*4, MbP, MbQ->luma_transform_size_8x8_flag);
+
+ {
+ int i;
+
+ if (MbP)
+ {
+ int QP_Chroma0 = (MbP->qpc[0] + MbQ->qpc[0] + 1) >> 1;
+ int QP_Chroma1 = (MbP->qpc[1] + MbQ->qpc[1] + 1) >> 1;
+ int QP = (MbP->qp + QPQ + 1) >> 1;
+
+ int indexA = iClip3(0, MAX_QP, QP + MbQ->DFAlphaC0Offset);
+ int indexB = iClip3(0, MAX_QP, QP + MbQ->DFBetaOffset);
+ const byte *ClipTab = CLIP_TAB[indexA];
+
+ alphas[0] = ALPHA_TABLE[indexA];
+ betas[0] = BETA_TABLE [indexB];
+ for (i=0;i<4;i++)
+ {
+ thresholds.as8[i] = ClipTab[strength.as8[i]];
+ }
+
+ indexA = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFAlphaC0Offset);
+ alphas_chroma[0][0] = ALPHA_TABLE[indexA];
+ ClipTabP_Chroma[0] = CLIP_TAB[indexA];
+ indexB = iClip3(0, MAX_QP, QP_Chroma0 + MbQ->DFBetaOffset);
+ betas_chroma[0][0] =BETA_TABLE[indexB];
+
+ indexA = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFAlphaC0Offset);
+ alphas_chroma[1][0] = ALPHA_TABLE[indexA];
+ ClipTabP_Chroma[1] = CLIP_TAB[indexA];
+ indexB = iClip3(0, MAX_QP, QP_Chroma1 + MbQ->DFBetaOffset);
+ betas_chroma[1][0] = BETA_TABLE[indexB];
+ }
+
+ for (i=4;i<16;i++)
+ {
+ thresholds.as8[i] = ClipTabQ[strength.as8[i]];
+ }
+
+ ippiFilterDeblockingLuma_HorEdge_H264_8u_C1IR(YQ, imgY->stride, alphas, betas, thresholds.as8, strength.as8);
+
+ if (MbP)
+ {
+ for (i=0;i<4;i++)
+ {
+ thresholds.as8[i] = ClipTabP_Chroma[0][strength.as8[i]];
+ }
+ }
+ for (i=4;i<8;i++)
+ {
+ thresholds.as8[i] = ClipTabQ_Chroma[0][strength.as8[i+4]];
+ }
+ ippiFilterDeblockingChroma_HorEdge_H264_8u_C1IR(UQ, imgUV[0]->stride, alphas_chroma[0], betas_chroma[0], thresholds.as8, strength.as8);
+
+ if (MbP)
+ {
+ for (i=0;i<4;i++)
+ {
+ thresholds.as8[i] = ClipTabP_Chroma[1][strength.as8[i]];
+ }
+ }
+ for (i=4;i<8;i++)
+ {
+ thresholds.as8[i] = ClipTabQ_Chroma[1][strength.as8[i+4]];
+ }
+ ippiFilterDeblockingChroma_HorEdge_H264_8u_C1IR(VQ, imgUV[1]->stride, alphas_chroma[1], betas_chroma[1], thresholds.as8, strength.as8);
+
+ }
+#pragma endregion
+
+ p_Vid->DeblockCall = 0;
+}
+
+
+#define ANY_INTRA (MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)
+
+/*!
+*********************************************************************************************
+* \brief
+* returns a buffer of 16 Strength values for one stripe in a mb (for different Frame or Field types)
+*********************************************************************************************
+*/
+void GetStrengthNormal_Horiz(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p);
+void GetStrengthNormal_Vert(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p);
+
+static void GetStrengthNormal(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p)
+{
+ if (dir == 0)
+ GetStrengthNormal_Vert(Strength, MbQ, edge, mvlimit, p);
+ else
+ GetStrengthNormal_Horiz(Strength, MbQ, edge, mvlimit, p);
+}
+
+/*!
+*********************************************************************************************
+* \brief
+* returns a buffer of 16 Strength values for one stripe in a mb (for MBAFF)
+*********************************************************************************************
+*/
+static void GetStrengthMBAff_Horiz(byte Strength[16], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p)
+{
+ // dir == 1
+ short blkP, blkQ, idx;
+ short blk_x, blk_x2, blk_y, blk_y2 ;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int xQ, yQ;
+ short mb_x, mb_y;
+ Macroblock *MbP;
+
+ PixelPos pixP;
+ int dir_m1 = 0;
+
+ PicMotionParams *motion = &p->motion;
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ yQ = (edge < MB_BLOCK_SIZE ? edge : 1);
+
+ for( idx = 0; idx < 16; ++idx )
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ xQ = idx;
+
+ p_Vid->getNeighbourLuma(MbQ, xQ , yQ - 1, &pixP);
+ blkQ = (short) ((yQ & 0xFFFC) + (xQ >> 2));
+ blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2));
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field);
+
+ if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) ||
+ (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3;
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) ||
+ (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3;
+
+ if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM)
+ && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) )
+ {
+ if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) )
+ Strength[idx] = 2 ;
+ else
+ {
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ if (p_Vid->mixedModeEdgeFlag)
+ {
+ (Strength[idx] = 1);
+ }
+ else
+ {
+ p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ blk_y = (short) ((mb_y<<2) + (blkQ >> 2));
+ blk_x = (short) ((mb_x<<2) + (blkQ & 3));
+ blk_y2 = (short) (pixP.pos_y >> 2);
+ blk_x2 = (short) (pixP.pos_x >> 2);
+ {
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+ motion_p0=&motion0[blk_y ][blk_x ];
+ motion_q0=&motion0[blk_y2][blk_x2];
+ motion_p1=&motion1[blk_y ][blk_x ];
+ motion_q1=&motion1[blk_y2][blk_x2];
+
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) ||
+ ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ Strength[idx]=0;
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0==ref_q0)
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+
+ Strength[idx] = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ Strength[idx] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void GetStrengthMBAff_Vert(byte Strength[16], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p)
+{
+ // dir == 0
+ short blkP, blkQ, idx;
+ short blk_x, blk_x2, blk_y, blk_y2 ;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int xQ, yQ;
+ short mb_x, mb_y;
+ Macroblock *MbP;
+
+ PixelPos pixP;
+ int dir_m1 = 1;
+
+ PicMotionParams *motion = &p->motion;
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ xQ = edge;
+ for( idx = 0; idx < 16; ++idx )
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+
+ yQ = idx;
+ p_Vid->getNeighbourLuma(MbQ, xQ - 1, yQ, &pixP);
+ blkQ = (short) ((yQ & 0xFFFC) + (xQ >> 2));
+ blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2));
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field);
+
+ if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) ||
+ (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)) ||
+ ((p->mb_aff_frame_flag || (p->structure != FRAME))))) ? 4 : 3;
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ Strength[idx] = (edge == 0 && (((!p->mb_aff_frame_flag && (p->structure==FRAME)) ||
+ (p->mb_aff_frame_flag && !MbP->mb_field && !MbQ->mb_field)) ||
+ ((p->mb_aff_frame_flag || (p->structure!=FRAME))))) ? 4 : 3;
+
+ if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM)
+ && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) )
+ {
+ if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) )
+ Strength[idx] = 2 ;
+ else
+ {
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ if (p_Vid->mixedModeEdgeFlag)
+ {
+ (Strength[idx] = 1);
+ }
+ else
+ {
+ p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ blk_y = (short) ((mb_y<<2) + (blkQ >> 2));
+ blk_x = (short) ((mb_x<<2) + (blkQ & 3));
+ blk_y2 = (short) (pixP.pos_y >> 2);
+ blk_x2 = (short) (pixP.pos_x >> 2);
+ {
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+ motion_p0=&motion0[blk_y ][blk_x ];
+ motion_q0=&motion0[blk_y2][blk_x2];
+ motion_p1=&motion1[blk_y ][blk_x ];
+ motion_q1=&motion1[blk_y2][blk_x2];
+
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) ||
+ ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ Strength[idx]=0;
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0==ref_q0)
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+
+ Strength[idx] = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ Strength[idx] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void GetStrengthMBAff(byte Strength[16], Macroblock *MbQ, int dir, int edge, int mvlimit, StorablePicture *p)
+{
+ if (dir == 0)
+ GetStrengthMBAff_Vert(Strength, MbQ, dir, edge, mvlimit, p);
+ else
+ GetStrengthMBAff_Horiz(Strength, MbQ, dir, edge, mvlimit, p);
+}
+
+/*!
+*****************************************************************************************
+* \brief
+* Filters 16 pel block edge of Frame or Field coded MBs
+*****************************************************************************************
+*/
+
+
+static void EdgeLoopLumaNormal(ColorPlane pl, VideoImage *image, const byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ,
+ int dir, int edge, StorablePicture *p)
+{
+ if (dir == 0)
+ EdgeLoopLumaNormal_Vert(pl, image, Strength, MbQ, edge, p);
+ else if (sse2_flag)
+ EdgeLoopLumaNormal_Horiz_sse2(pl, image, Strength, MbQ, edge, p);
+ else
+ EdgeLoopLumaNormal_Horiz(pl, image, Strength, MbQ, edge, p);
+}
+
+/*!
+*****************************************************************************************
+* \brief
+* Filters 16 pel block edge of Super MB Frame coded MBs
+*****************************************************************************************
+*/
+static void EdgeLoopLumaMBAff_Horiz(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p)
+{
+ // dir == 1
+ imgpel **Img = image->img;
+ int width = image->stride;
+ int pel, ap = 0, aq = 0, Strng ;
+ int incP, incQ;
+ int C0, tc0, dif;
+ imgpel L0, R0;
+ int Alpha = 0, Beta = 0 ;
+ const byte* ClipTab = NULL;
+ int small_gap;
+ int indexA, indexB;
+ int PelNum = pl? pelnum_cr[1][p->chroma_format_idc] : MB_BLOCK_SIZE;
+
+ int QP;
+ int xQ, yQ;
+
+ PixelPos pixP, pixQ;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int bitdepth_scale = pl? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+ byte fieldModeFilteringFlag;
+
+ Macroblock *MbP;
+ imgpel *SrcPtrP, *SrcPtrQ;
+
+ for( pel = 0 ; pel < PelNum ; ++pel )
+ {
+ xQ = pel ;
+ yQ = (edge < 16 ? edge : 1) ;
+ p_Vid->getNeighbourLuma(MbQ, xQ, yQ - 1, &pixP);
+
+ if (pixP.available || (MbQ->DFDisableIdc== 0))
+ {
+ if( (Strng = Strength[pel]) != 0)
+ {
+ p_Vid->getNeighbourLuma(MbQ, xQ, yQ, &pixQ);
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field);
+
+ incQ = ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width);
+ incP = ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width);
+ SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+
+ // Average QP of the two blocks
+ QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1;
+
+ indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ Beta = BETA_TABLE [indexB] * bitdepth_scale;
+ ClipTab = CLIP_TAB[indexA];
+
+
+ L0 = SrcPtrP[0] ;
+ R0 = SrcPtrQ[0] ;
+
+
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel L1 = SrcPtrP[-incP];
+ imgpel R1 = SrcPtrQ[ incQ];
+ if ((abs( R0 - R1) < Beta ) && (abs(L0 - L1) < Beta ))
+ {
+ imgpel L2 = SrcPtrP[-incP*2];
+ imgpel R2 = SrcPtrQ[ incQ*2];
+ if(Strng == 4 ) // INTRA strong filtering
+ {
+ int RL0 = L0 + R0;
+ small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2));
+ aq = ( abs( R0 - R2) < Beta ) & small_gap;
+ ap = ( abs( L0 - L2) < Beta ) & small_gap;
+
+ if (ap)
+ {
+ imgpel L3 = SrcPtrP[-incP*3];
+ SrcPtrP[-incP * 2] = (imgpel) ((((L3 + L2) << 1) + L2 + L1 + RL0 + 4) >> 3);
+ SrcPtrP[-incP ] = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2);
+ SrcPtrP[ 0 ] = (imgpel) (( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrP[ 0 ] = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ;
+ }
+
+ if (aq)
+ {
+ imgpel R3 = SrcPtrQ[ incQ*3];
+ SrcPtrQ[ 0 ] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3);
+ SrcPtrQ[ incQ ] = (imgpel) (( R2 + R0 + R1 + L0 + 2) >> 2);
+ SrcPtrQ[ incQ * 2 ] = (imgpel) ((((R3 + R2) << 1) + R2 + R1 + RL0 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrQ[ 0 ] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2);
+ }
+ }
+ else // normal filtering
+ {
+ int RL0 = (L0 + R0 + 1) >> 1;
+ aq = (abs( R0 - R2) < Beta);
+ ap = (abs( L0 - L2) < Beta);
+
+ C0 = ClipTab[ Strng ] * bitdepth_scale;
+ tc0 = (C0 + ap + aq) ;
+ dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3) ;
+
+ if( ap )
+ *(SrcPtrP - incP) += iClip3( -C0, C0, ( L2 + RL0 - (L1 << 1)) >> 1 ) ;
+
+ *SrcPtrP = (imgpel) iClip1 (max_imgpel_value, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 (max_imgpel_value, R0 - dif) ;
+
+ if( aq )
+ *(SrcPtrQ + incQ) += iClip3( -C0, C0, ( R2 + RL0 - (R1 << 1)) >> 1 ) ;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void EdgeLoopLumaMBAff_Vert(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int edge, StorablePicture *p)
+{
+ // dir == 0
+ imgpel **Img = image->img;
+ int width = image->stride;
+ int pel, ap = 0, aq = 0, Strng ;
+
+ int C0, tc0, dif;
+ imgpel L0, R0;
+ int Alpha = 0, Beta = 0 ;
+ const byte* ClipTab = NULL;
+ int small_gap;
+ int indexA, indexB;
+ int PelNum = pl? pelnum_cr[0][p->chroma_format_idc] : MB_BLOCK_SIZE;
+
+ int QP;
+ int xQ, yQ;
+
+ PixelPos pixP, pixQ;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int bitdepth_scale = pl? p_Vid->bitdepth_scale[IS_CHROMA] : p_Vid->bitdepth_scale[IS_LUMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+ byte fieldModeFilteringFlag;
+
+ Macroblock *MbP;
+ imgpel *SrcPtrP, *SrcPtrQ;
+
+ for( pel = 0 ; pel < PelNum ; ++pel )
+ {
+ xQ = edge;
+ yQ = pel;
+ p_Vid->getNeighbourXPLuma(MbQ, xQ - 1, yQ, &pixP);
+
+ if (pixP.available || (MbQ->DFDisableIdc== 0))
+ {
+ if( (Strng = Strength[pel]) != 0)
+ {
+ p_Vid->getNeighbourLuma(MbQ, xQ, yQ, &pixQ);
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field);
+
+ SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+
+ // Average QP of the two blocks
+ QP = pl? ((MbP->qpc[pl-1] + MbQ->qpc[pl-1] + 1) >> 1) : (MbP->qp + MbQ->qp + 1) >> 1;
+
+ indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ Beta = BETA_TABLE [indexB] * bitdepth_scale;
+ ClipTab = CLIP_TAB[indexA];
+
+
+ L0 = SrcPtrP[0] ;
+ R0 = SrcPtrQ[0] ;
+
+
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel L1 = SrcPtrP[-1];
+ imgpel R1 = SrcPtrQ[ 1];
+ if ((abs( R0 - R1) < Beta ) && (abs(L0 - L1) < Beta ))
+ {
+ imgpel L2 = SrcPtrP[-2];
+ imgpel R2 = SrcPtrQ[ 2];
+ if(Strng == 4 ) // INTRA strong filtering
+ {
+ int RL0 = L0 + R0;
+ small_gap = (abs( R0 - L0 ) < ((Alpha >> 2) + 2));
+ aq = ( abs( R0 - R2) < Beta ) & small_gap;
+ ap = ( abs( L0 - L2) < Beta ) & small_gap;
+
+ if (ap)
+ {
+ imgpel L3 = SrcPtrP[-3];
+ SrcPtrP[-2] = (imgpel) ((((L3 + L2) << 1) + L2 + L1 + RL0 + 4) >> 3);
+ SrcPtrP[-1 ] = (imgpel) (( L2 + L1 + L0 + R0 + 2) >> 2);
+ SrcPtrP[ 0 ] = (imgpel) (( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrP[ 0 ] = (imgpel) (((L1 << 1) + L0 + R1 + 2) >> 2) ;
+ }
+
+ if (aq)
+ {
+ imgpel R3 = SrcPtrQ[ 3];
+ SrcPtrQ[ 0 ] = (imgpel) (( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3);
+ SrcPtrQ[ 1 ] = (imgpel) (( R2 + R0 + R1 + L0 + 2) >> 2);
+ SrcPtrQ[ 2 ] = (imgpel) ((((R3 + R2) << 1) + R2 + R1 + RL0 + 4) >> 3);
+ }
+ else
+ {
+ SrcPtrQ[ 0 ] = (imgpel) (((R1 << 1) + R0 + L1 + 2) >> 2);
+ }
+ }
+ else // normal filtering
+ {
+ int RL0 = (L0 + R0 + 1) >> 1;
+ aq = (abs( R0 - R2) < Beta);
+ ap = (abs( L0 - L2) < Beta);
+
+ C0 = ClipTab[ Strng ] * bitdepth_scale;
+ tc0 = (C0 + ap + aq) ;
+ dif = iClip3( -tc0, tc0, (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3) ;
+
+ if( ap )
+ *(SrcPtrP - 1) += iClip3( -C0, C0, ( L2 + RL0 - (L1 << 1)) >> 1 ) ;
+
+ *SrcPtrP = (imgpel) iClip1 (max_imgpel_value, L0 + dif) ;
+ *SrcPtrQ = (imgpel) iClip1 (max_imgpel_value, R0 - dif) ;
+
+ if( aq )
+ *(SrcPtrQ + 1) += iClip3( -C0, C0, ( R2 + RL0 - (R1 << 1)) >> 1 ) ;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void EdgeLoopLumaMBAff(ColorPlane pl, VideoImage *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, StorablePicture *p)
+{
+ if (dir == 0)
+ EdgeLoopLumaMBAff_Vert(pl, image, Strength, MbQ, edge, p);
+ else
+ EdgeLoopLumaMBAff_Horiz(pl, image, Strength, MbQ, edge, p);
+}
+
+/*!
+*****************************************************************************************
+* \brief
+* Filters chroma block edge for Frame or Field coded pictures
+*****************************************************************************************
+*/
+
+
+static void EdgeLoopChromaNormal(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p)
+{
+ if (dir == 0)
+ EdgeLoopChromaNormal_Vert(image, Strength, MbQ, edge, uv, p);
+ else
+ EdgeLoopChromaNormal_Horiz(image, Strength, MbQ, edge, uv, p);
+
+}
+/*!
+*****************************************************************************************
+* \brief
+* Filters chroma block edge for MBAFF types
+*****************************************************************************************
+*/
+static void EdgeLoopChromaMBAff(VideoImage *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, StorablePicture *p)
+{
+ imgpel** Img = image->img;
+
+ int pel, Strng ;
+ int incP, incQ;
+ int C0, tc0, dif;
+ imgpel L0, R0;
+ int Alpha = 0, Beta = 0;
+ const byte* ClipTab = NULL;
+ int indexA, indexB;
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int PelNum = pelnum_cr[dir][p->chroma_format_idc];
+ int StrengthIdx;
+ int QP;
+ int xQ, yQ;
+ PixelPos pixP, pixQ;
+ int dir_m1 = 1 - dir;
+ int bitdepth_scale = p_Vid->bitdepth_scale[IS_CHROMA];
+ int max_imgpel_value = p_Vid->max_pel_value_comp[uv + 1];
+
+ int AlphaC0Offset = MbQ->DFAlphaC0Offset;
+ int BetaOffset = MbQ->DFBetaOffset;
+ byte fieldModeFilteringFlag;
+ Macroblock *MbP;
+ imgpel *SrcPtrP, *SrcPtrQ;
+ int width = image->stride;
+
+ for( pel = 0 ; pel < PelNum ; ++pel )
+ {
+ xQ = dir ? pel : edge;
+ yQ = dir ? (edge < 16? edge : 1) : pel;
+ p_Vid->getNeighbour(MbQ, xQ, yQ, p_Vid->mb_size[IS_CHROMA], &pixQ);
+ p_Vid->getNeighbour(MbQ, xQ - (dir_m1), yQ - dir, p_Vid->mb_size[IS_CHROMA], &pixP);
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ StrengthIdx = (PelNum == 8) ? ((MbQ->mb_field && !MbP->mb_field) ? pel << 1 :((pel >> 1) << 2) + (pel & 0x01)) : pel;
+
+ if (pixP.available || (MbQ->DFDisableIdc == 0))
+ {
+ if( (Strng = Strength[StrengthIdx]) != 0)
+ {
+ fieldModeFilteringFlag = (byte) (MbQ->mb_field || MbP->mb_field);
+ incQ = dir ? ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width) : 1;
+ incP = dir ? ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width) : 1;
+ SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+ SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+
+ // Average QP of the two blocks
+ QP = (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1;
+
+ indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+ indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+ Alpha = ALPHA_TABLE[indexA] * bitdepth_scale;
+ Beta = BETA_TABLE [indexB] * bitdepth_scale;
+ ClipTab = CLIP_TAB[indexA];
+
+
+ L0 = SrcPtrP[0] ;
+ R0 = SrcPtrQ[0] ;
+
+
+ if( abs( R0 - L0 ) < Alpha )
+ {
+ imgpel L1 = SrcPtrP[-incP];
+ imgpel R1 = SrcPtrQ[ incQ];
+ //if( ((abs( R0 - R1) - Beta ) & (abs(L0 - L1) - Beta )) < 0 )
+ if( ((abs( R0 - R1) - Beta < 0) && (abs(L0 - L1) - Beta < 0 )) )
+ {
+ if( Strng == 4 ) // INTRA strong filtering
+ {
+ SrcPtrQ[0] = (imgpel) ( ((R1 << 1) + R0 + L1 + 2) >> 2 );
+ SrcPtrP[0] = (imgpel) ( ((L1 << 1) + L0 + R1 + 2) >> 2 );
+ }
+ else
+ {
+ C0 = ClipTab[ Strng ] * bitdepth_scale;
+ tc0 = (C0 + 1);
+ dif = iClip3( -tc0, tc0, ( ((R0 - L0) << 2) + (L1 - R1) + 4) >> 3 );
+
+ SrcPtrP[0] = (imgpel) iClip1 ( max_imgpel_value, L0 + dif );
+ SrcPtrQ[0] = (imgpel) iClip1 ( max_imgpel_value, R0 - dif );
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/macroblock.asm b/Src/h264dec/ldecod/src/macroblock.asm
new file mode 100644
index 00000000..0f96d2b5
--- /dev/null
+++ b/Src/h264dec/ldecod/src/macroblock.asm
@@ -0,0 +1,189 @@
+.686
+.XMM
+.model FLAT
+
+
+PUBLIC _inv_level_coefficients
+_TEXT SEGMENT
+_blocks$ = 8 ; size = 4
+_InvLevelScale$ = 12 ; size = 4
+_qp_per$ = 16 ; size = 4
+_inv_level_coefficients PROC
+
+ mov eax, DWORD PTR _blocks$[esp-4]
+ mov ecx, DWORD PTR _qp_per$[esp-4]
+ mov edx, DWORD PTR _InvLevelScale$[esp-4]
+ push esi
+ push edi
+ mov edi, 4
+$LL10@inv_level_:
+
+; 3870 : {
+; 3871 : h264_short_block_row_t *block = blocks[b];
+; 3872 : for (j = 0; j < 4; ++j)
+; 3873 : {
+; 3874 : if (block[j][0]) block[j][0]= rshift_rnd_sf((block[j][0] * InvLevelScale[j][0]) << qp_per, 4);
+
+ movsx esi, WORD PTR [eax+4-4]
+ test esi, esi
+ je SHORT $LN4@inv_level_
+ imul esi, DWORD PTR [edx]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4-4], si
+$LN4@inv_level_:
+
+; 3875 : if (block[j][1]) block[j][1]= rshift_rnd_sf((block[j][1] * InvLevelScale[j][1]) << qp_per, 4);
+
+ movsx esi, WORD PTR [eax+4-2]
+ test esi, esi
+ je SHORT $LN3@inv_level_
+ imul esi, DWORD PTR [edx+4]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4-2], si
+$LN3@inv_level_:
+
+; 3876 : if (block[j][2]) block[j][2]= rshift_rnd_sf((block[j][2] * InvLevelScale[j][2]) << qp_per, 4);
+
+ movsx esi, WORD PTR [eax+4]
+ test esi, esi
+ je SHORT $LN2@inv_level_
+ imul esi, DWORD PTR [edx+8]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4], si
+$LN2@inv_level_:
+
+; 3877 : if (block[j][3]) block[j][3]= rshift_rnd_sf((block[j][3] * InvLevelScale[j][3]) << qp_per, 4);
+
+ movsx esi, WORD PTR [eax+4+2]
+ test esi, esi
+ je SHORT $LN6@inv_level_
+ imul esi, DWORD PTR [edx+12]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+2], si
+$LN6@inv_level_:
+ movsx esi, WORD PTR [eax+4+4]
+ test esi, esi
+ je SHORT $LN27@inv_level_
+ imul esi, DWORD PTR [edx+16]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+4], si
+$LN27@inv_level_:
+ movsx esi, WORD PTR [eax+4+6]
+ test esi, esi
+ je SHORT $LN28@inv_level_
+ imul esi, DWORD PTR [edx+20]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+6], si
+$LN28@inv_level_:
+ movsx esi, WORD PTR [eax+4+8]
+ test esi, esi
+ je SHORT $LN29@inv_level_
+ imul esi, DWORD PTR [edx+24]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+8], si
+$LN29@inv_level_:
+ movsx esi, WORD PTR [eax+4+10]
+ test esi, esi
+ je SHORT $LN30@inv_level_
+ imul esi, DWORD PTR [edx+28]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+10], si
+$LN30@inv_level_:
+ movsx esi, WORD PTR [eax+4+12]
+ test esi, esi
+ je SHORT $LN32@inv_level_
+ imul esi, DWORD PTR [edx+32]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+12], si
+$LN32@inv_level_:
+ movsx esi, WORD PTR [eax+4+14]
+ test esi, esi
+ je SHORT $LN33@inv_level_
+ imul esi, DWORD PTR [edx+36]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+14], si
+$LN33@inv_level_:
+ movsx esi, WORD PTR [eax+4+16]
+ test esi, esi
+ je SHORT $LN34@inv_level_
+ imul esi, DWORD PTR [edx+40]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+16], si
+$LN34@inv_level_:
+ movsx esi, WORD PTR [eax+4+18]
+ test esi, esi
+ je SHORT $LN35@inv_level_
+ imul esi, DWORD PTR [edx+44]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+18], si
+$LN35@inv_level_:
+ movsx esi, WORD PTR [eax+4+20]
+ test esi, esi
+ je SHORT $LN37@inv_level_
+ imul esi, DWORD PTR [edx+48]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+20], si
+$LN37@inv_level_:
+ movsx esi, WORD PTR [eax+4+22]
+ test esi, esi
+ je SHORT $LN38@inv_level_
+ imul esi, DWORD PTR [edx+52]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+22], si
+$LN38@inv_level_:
+ movsx esi, WORD PTR [eax+4+24]
+ test esi, esi
+ je SHORT $LN39@inv_level_
+ imul esi, DWORD PTR [edx+56]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+24], si
+$LN39@inv_level_:
+ movsx esi, WORD PTR [eax+4+26]
+ test esi, esi
+ je SHORT $LN9@inv_level_
+ imul esi, DWORD PTR [edx+60]
+ shl esi, cl
+ add esi, 8
+ sar esi, 4
+ mov WORD PTR [eax+4+26], si
+$LN9@inv_level_:
+ add eax, 32 ; 00000020H
+ sub edi, 1
+ jne $LL10@inv_level_
+ pop edi
+ pop esi
+
+ ret 0
+_inv_level_coefficients ENDP
+
+END \ No newline at end of file
diff --git a/Src/h264dec/ldecod/src/macroblock.c b/Src/h264dec/ldecod/src/macroblock.c
new file mode 100644
index 00000000..10d083b1
--- /dev/null
+++ b/Src/h264dec/ldecod/src/macroblock.c
@@ -0,0 +1,6475 @@
+
+/*!
+***********************************************************************
+* \file macroblock.c
+*
+* \brief
+* Decode a Macroblock
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Inge Lille-Langøy <inge.lille-langoy@telenor.com>
+* - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
+* - Jani Lainema <jani.lainema@nokia.com>
+* - Sebastian Purreiter <sebastian.purreiter@mch.siemens.de>
+* - Thomas Wedi <wedi@tnt.uni-hannover.de>
+* - Detlev Marpe <marpe@hhi.de>
+* - Gabi Blaettermann
+* - Ye-Kui Wang <wyk@ieee.org>
+* - Lowell Winger <lwinger@lsil.com>
+* - Alexis Michael Tourapis <alexismt@ieee.org>
+***********************************************************************
+*/
+
+#include "contributors.h"
+
+#include <math.h>
+
+#include "block.h"
+#include "global.h"
+#include "mbuffer.h"
+#include "elements.h"
+#include "errorconcealment.h"
+#include "macroblock.h"
+#include "fmo.h"
+#include "cabac.h"
+#include "vlc.h"
+#include "image.h"
+#include "mb_access.h"
+#include "biaridecod.h"
+#include "transform8x8.h"
+#include "transform.h"
+#include "mc_prediction.h"
+#include "quant.h"
+#include "intra4x4_pred.h"
+#include "intra8x8_pred.h"
+#include "intra16x16_pred.h"
+#include "mv_prediction.h"
+#include "optim.h"
+#include "mb_prediction.h"
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#if TRACE
+#define TRACE_STRING(s) strncpy(currSE.tracestring, s, TRACESTRING_SIZE)
+#define TRACE_DECBITS(i) dectracebitcnt(1)
+#define TRACE_PRINTF(s) sprintf(type, "%s", s);
+#define TRACE_STRING_P(s) strncpy(currSE->tracestring, s, TRACESTRING_SIZE)
+#else
+#define TRACE_STRING(s)
+#define TRACE_DECBITS(i)
+#define TRACE_PRINTF(s)
+#define TRACE_STRING_P(s)
+#endif
+
+//! look up tables for FRExt_chroma support
+void dectracebitcnt(int count);
+
+static void read_motion_info_from_NAL_p_slice (Macroblock *currMB);
+static void read_motion_info_from_NAL_b_slice (Macroblock *currMB);
+static void read_ipred_modes (Macroblock *currMB);
+static void read_CBP_and_coeffs_from_NAL_CABAC (Macroblock *currMB);
+static void read_CBP_and_coeffs_from_NAL_CAVLC (Macroblock *currMB);
+static void read_IPCM_coeffs_from_NAL (Slice *currSlice, struct datapartition *dP);
+static void read_one_macroblock_i_slice (Macroblock *currMB);
+static void read_one_macroblock_p_slice (Macroblock *currMB);
+static void read_one_macroblock_b_slice (Macroblock *currMB);
+static int decode_one_component_i_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+static int decode_one_component_p_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+static int decode_one_component_b_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+static int decode_one_component_sp_slice (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, StorablePicture *dec_picture);
+
+static inline void or_bits(int64 *x, int mask, int position)
+{
+#ifdef _M_IX86
+ __m64 mmx_x = *(__m64 *)x;
+ __m64 mmx_mask = _mm_cvtsi32_si64(mask);
+ mmx_mask=_mm_slli_si64(mmx_mask, position);
+ mmx_x = _mm_or_si64(mmx_x, mmx_mask);
+ *(__m64 *)x = mmx_x;
+#else
+ *x |= ((int64) mask << position);
+#endif
+}
+
+/*!
+************************************************************************
+* \brief
+* Set context for reference frames
+************************************************************************
+*/
+static inline int BType2CtxRef (int btype)
+{
+ return (btype >= 4);
+}
+
+/*!
+************************************************************************
+* \brief
+* Function for reading the reference picture indices using VLC
+************************************************************************
+*/
+static char readRefPictureIdx_VLC(SyntaxElement *currSE, DataPartition *dP, int list)
+{
+#if TRACE
+ char tstring[20];
+ sprintf( tstring, "ref_idx_l%d", list);
+ strncpy(currSE->tracestring, tstring, TRACESTRING_SIZE);
+#endif
+ currSE->value2 = list;
+ readSyntaxElement_UVLC(currSE, dP);
+ return (char) currSE->value1;
+}
+
+/*!
+************************************************************************
+* \brief
+* Function for reading the reference picture indices using FLC
+************************************************************************
+*/
+static char readRefPictureIdx_FLC(SyntaxElement *currSE, DataPartition *dP, int list)
+{
+#if TRACE
+ char tstring[20];
+ sprintf( tstring, "ref_idx_l%d", list);
+ strncpy(currSE->tracestring, tstring, TRACESTRING_SIZE);
+#endif
+ //currSE->len = 1;
+ currSE->value1 = 1 - readSyntaxElement_FLC(dP->bitstream, 1);
+
+ return (char) currSE->value1;
+}
+
+/*!
+************************************************************************
+* \brief
+* Dummy Function for reading the reference picture indices
+************************************************************************
+*/
+static char readRefPictureIdx_Null(SyntaxElement *currSE, DataPartition *dP, int list)
+{
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* Function to prepare reference picture indice function pointer
+************************************************************************
+*/
+static void prepareListforRefIdx ( Macroblock *currMB, SyntaxElement *currSE, int num_ref_idx_active, int refidx_present)
+{
+ currMB->readRefPictureIdx = readRefPictureIdx_Null; // Initialize readRefPictureIdx
+ if(num_ref_idx_active > 1)
+ {
+ currSE->mapping = linfo_ue;
+ if (refidx_present)
+ {
+ if (num_ref_idx_active == 2)
+ currMB->readRefPictureIdx = readRefPictureIdx_FLC;
+ else
+ currMB->readRefPictureIdx = readRefPictureIdx_VLC;
+ }
+ }
+}
+
+#if defined(_DEBUG) || defined(_M_X64)
+void set_chroma_qp(Macroblock* currMB)
+{
+ // TODO: benski> we could use MMX for this if we could find a formula for QP_SCALE_CR
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int i;
+ for (i=0; i<2; ++i)
+ {
+ currMB->qpc[i] = iClip3 ( -p_Vid->bitdepth_chroma_qp_scale, 51, currMB->qp + dec_picture->chroma_qp_offset[i] );
+ currMB->qpc[i] = currMB->qpc[i] < 0 ? currMB->qpc[i] : QP_SCALE_CR[currMB->qpc[i]];
+ currMB->qp_scaled[i + 1] = currMB->qpc[i] + p_Vid->bitdepth_chroma_qp_scale;
+ }
+}
+#else
+void set_chroma_qp(Macroblock* currMB);
+#endif
+
+/*!
+************************************************************************
+* \brief
+* updates chroma QP according to luma QP and bit depth
+************************************************************************
+*/
+static inline void update_qp(Macroblock *currMB, int qp)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ currMB->qp = qp;
+ currMB->qp_scaled[0] = qp + p_Vid->bitdepth_luma_qp_scale;
+ set_chroma_qp(currMB);
+ currMB->is_lossless = (Boolean) ((currMB->qp_scaled[0] == 0) && (p_Vid->lossless_qpprime_flag == 1));
+}
+
+static void read_delta_quant_CAVLC(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, const byte *partMap, int type)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+
+ currSE->mapping = linfo_se;
+ readSyntaxElement_UVLC(currSE, dP);
+ currMB->delta_quant = (short) currSE->value1;
+
+
+ if ((currMB->delta_quant < -(26 + p_Vid->bitdepth_luma_qp_scale/2)) || (currMB->delta_quant > (25 + p_Vid->bitdepth_luma_qp_scale/2)))
+ error ("mb_qp_delta is out of range", 500);
+
+ p_Vid->qp = ((p_Vid->qp + currMB->delta_quant + 52 + 2*p_Vid->bitdepth_luma_qp_scale)%(52+p_Vid->bitdepth_luma_qp_scale)) -
+ p_Vid->bitdepth_luma_qp_scale;
+ update_qp(currMB, p_Vid->qp);
+}
+
+static void inline read_delta_quant_CABAC(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, const byte *partMap, int type)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+ currMB->delta_quant = readDquant_CABAC(currSlice, &dP->de_cabac);
+
+ if ((currMB->delta_quant < -(26 + p_Vid->bitdepth_luma_qp_scale/2)) || (currMB->delta_quant > (25 + p_Vid->bitdepth_luma_qp_scale/2)))
+ error ("mb_qp_delta is out of range", 500);
+
+ p_Vid->qp = ((p_Vid->qp + currMB->delta_quant + 52 + 2*p_Vid->bitdepth_luma_qp_scale)%(52+p_Vid->bitdepth_luma_qp_scale)) - p_Vid->bitdepth_luma_qp_scale;
+ update_qp(currMB, p_Vid->qp);
+}
+
+/*!
+************************************************************************
+* \brief
+* Function to read reference picture indice values
+************************************************************************
+*/
+static void readMBRefPictureIdx(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0, int step_h0)
+{
+ int k, j, j0, i0, i;
+ char refframe;
+
+ for (j0 = 0; j0 < 4; j0 += step_v0)
+ {
+ currMB->subblock_y = j0 << 2;
+ for (i0 = 0; i0 < 4; i0 += step_h0)
+ {
+ currMB->subblock_x = i0 << 2;
+ k = 2 * (j0 >> 1) + (i0 >> 1);
+
+ if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0)
+ {
+ refframe = currMB->readRefPictureIdx(currSE, dP, list);
+
+ for (j = j0; j < j0 + step_v0; ++j)
+ {
+ for (i=0;i<step_h0;i++)
+ {
+ motion[j][currMB->block_x + i0 + i].ref_idx = refframe;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void readMBRefPictureIdx_CABAC1(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0)
+{
+ int k, j, j0, i0;
+ char refframe;
+
+ for (j0 = 0; j0 < 4; j0 += step_v0)
+ {
+ currMB->subblock_y = j0 << 2;
+ for (i0 = 0; i0 < 4; i0 += 1)
+ {
+ currMB->subblock_x = i0 << 2;
+ k = 2 * (j0 >> 1) + (i0 >> 1);
+
+ if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0)
+ {
+ refframe = readRefFrame_CABAC(currMB, &dP->de_cabac, list, i0<<2, j0<<2);
+
+ for (j = j0; j < j0 + step_v0; ++j)
+ motion[j][currMB->block_x + i0].ref_idx=refframe;
+ }
+ }
+ }
+}
+
+static void readMBRefPictureIdx_CABAC2(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0)
+{
+ int k, j, j0;
+ char refframe;
+
+ for (j0 = 0; j0 < 4; j0 += step_v0)
+ {
+ currMB->subblock_y = j0 << 2;
+
+ currMB->subblock_x = 0 << 2;
+ k = 2 * (j0 >> 1) + (0 >> 1);
+
+ if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0)
+ {
+ refframe = readRefFrame_CABAC0(currMB, &dP->de_cabac, list, j0<<2);
+
+ for (j = j0; j < j0 + step_v0; ++j)
+ {
+ motion[j][currMB->block_x + 0].ref_idx=refframe;
+ motion[j][currMB->block_x + 1].ref_idx=refframe;
+ }
+ }
+
+ //
+
+ currMB->subblock_x = 2 << 2;
+ k = 2 * (j0 >> 1) + (2 >> 1);
+
+ if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0)
+ {
+ refframe = readRefFrame_CABAC(currMB, &dP->de_cabac, list, 8, j0<<2);
+
+ for (j = j0; j < j0 + step_v0; ++j)
+ {
+ motion[j][currMB->block_x + 2].ref_idx=refframe;
+ motion[j][currMB->block_x + 3].ref_idx=refframe;
+ }
+ }
+
+ }
+}
+
+
+static void readMBRefPictureIdx_CABAC4(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0)
+{
+ int k, j, j0;
+ char refframe;
+
+ for (j0 = 0; j0 < 4; j0 += step_v0)
+ {
+ currMB->subblock_y = j0 << 2;
+ currMB->subblock_x = 0;
+ k = j0 & ~1;
+
+ if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0)
+ {
+ refframe = readRefFrame_CABAC0(currMB, &dP->de_cabac, list, j0<<2);
+ for (j = j0; j < j0 + step_v0; ++j)
+ {
+ motion[j][currMB->block_x + 0].ref_idx=refframe;
+ motion[j][currMB->block_x + 1].ref_idx=refframe;
+ motion[j][currMB->block_x + 2].ref_idx=refframe;
+ motion[j][currMB->block_x + 3].ref_idx=refframe;
+ }
+ }
+ }
+}
+
+static void readMBRefPictureIdx_CABAC(DataPartition *dP, Macroblock *currMB, PicMotion **motion, int list, int step_v0, int step_h0)
+{
+ switch(step_h0)
+ {
+ case 1:
+ readMBRefPictureIdx_CABAC1(dP, currMB, motion, list, step_v0);
+ break;
+ case 2:
+ readMBRefPictureIdx_CABAC2(dP, currMB, motion, list, step_v0);
+ break;
+ case 4:
+ readMBRefPictureIdx_CABAC4(dP, currMB, motion, list, step_v0);
+ break;
+ }
+}
+
+static void readMBRefPictureIdx_CABAC_NoReference(Macroblock *currMB, PicMotion **motion, int list, int step_v0, int step_h0)
+{
+ int k, j, j0, i0, i;
+
+ for (j0 = 0; j0 < 4; j0 += step_v0)
+ {
+ for (i0 = 0; i0 < 4; i0 += step_h0)
+ {
+ k = 2 * (j0 >> 1) + (i0 >> 1);
+
+ if ((currMB->b8pdir[k] == list || currMB->b8pdir[k] == BI_PRED) && currMB->b8mode[k] != 0)
+ {
+ for (j = j0; j < j0 + step_v0; ++j)
+ {
+ for (i=0;i<step_h0;i++)
+ {
+ motion[j][currMB->block_x + i0 + i].ref_idx=0;
+ }
+ }
+ }
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Function to read reference picture indice values
+************************************************************************
+*/
+static void readMBMotionVectors(SyntaxElement *currSE, DataPartition *dP, Macroblock *currMB, int list, int step_h0, int step_v0)
+{
+ int i, j, k, i4, j4, ii, jj, kk, i0, j0;
+ short curr_mvd[2], curr_mv[2], pred_mv[2];
+ MotionVector (*mvd)[4];
+ //MotionVector **mv;
+ int mv_mode, step_h, step_v;
+ char cur_ref_idx;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+ PixelPos block[4]; // neighbor blocks
+
+
+ for (j0=0; j0<4; j0+=step_v0)
+ {
+ for (i0=0; i0<4; i0+=step_h0)
+ {
+ kk = 2 * (j0 >> 1) + (i0 >> 1);
+ if ((currMB->b8pdir[kk]== list || currMB->b8pdir[kk]== BI_PRED) && (currMB->b8mode[kk] !=0))//has forward vector
+ {
+ PicMotion **list_motion = motion->motion[list];
+ cur_ref_idx = list_motion[currMB->block_y+j0][currMB->block_x+i0].ref_idx;
+ mv_mode = currMB->b8mode[kk];
+ step_h = BLOCK_STEP [mv_mode][0];
+ step_v = BLOCK_STEP [mv_mode][1];
+
+ for (j = j0; j < j0 + step_v0; j += step_v)
+ {
+ PicMotion **mv;
+ currMB->subblock_y = j << 2; // position used for context determination
+ j4 = currMB->block_y + j;
+ mv = &list_motion[j4];
+ mvd = &currMB->mvd [list][j];
+ for (i = i0; i < i0 + step_h0; i += step_h)
+ {
+ currMB->subblock_x = i << 2; // position used for context determination
+ i4 = currMB->block_x + i;
+
+ get_neighbors(currMB, block, BLOCK_SIZE * i, BLOCK_SIZE * j, 4 * step_h);
+
+ // first make mv-prediction
+ currMB->GetMVPredictor (currMB, block, pred_mv, cur_ref_idx, list_motion, BLOCK_SIZE * i, BLOCK_SIZE * j, 4 * step_h, 4 * step_v);
+
+ for (k=0; k < 2; ++k)
+ {
+ currSE->value2 = (k << 1) + list; // identifies the component; only used for context determination
+ readSyntaxElement_UVLC(currSE, dP);
+ curr_mvd[k] = (short) currSE->value1;
+ curr_mv [k] = (short)(curr_mvd[k] + pred_mv[k]); // compute motion vector
+ }
+
+ // Init motion vectors
+ for(jj = 0; jj < step_v; ++jj)
+ {
+ for(ii = i4; ii < i4 + step_h; ++ii)
+ {
+ memcpy(&mv[jj][ii].mv, curr_mv, sizeof(MotionVector));
+ }
+ }
+
+ // Init first line (mvd)
+ for(ii = i; ii < i + step_h; ++ii)
+ {
+ memcpy(mvd[0][ii], curr_mvd, sizeof(MotionVector));
+ }
+
+ // now copy all other lines
+ for(jj = 1; jj < step_v; ++jj)
+ {
+ memcpy(mvd[jj][i], mvd[0][i], step_h * sizeof(MotionVector));
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void readMBMotionVectors_CABAC(DataPartition *dP, Macroblock *currMB, int list, int step_h0, int step_v0)
+{
+ int i, j, k, i4, j4, ii, jj, kk, i0, j0;
+ short curr_mvd[2], curr_mv[2], pred_mv[2];
+ MotionVector (*mvd)[4];
+ //MotionVector **mv;
+ int mv_mode, step_h, step_v;
+ char cur_ref_idx;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+ PixelPos block[4]; // neighbor blocks
+
+ for (j0=0; j0<4; j0+=step_v0)
+ {
+ for (i0=0; i0<4; i0+=step_h0)
+ {
+ kk = (j0 & ~1) + (i0 >> 1);
+ if ((currMB->b8pdir[kk]== list || currMB->b8pdir[kk]== BI_PRED) && (currMB->b8mode[kk] !=0))//has forward vector
+ {
+ PicMotion **list_motion = motion->motion[list];
+ cur_ref_idx = list_motion[currMB->block_y+j0][currMB->block_x+i0].ref_idx;
+ mv_mode = currMB->b8mode[kk];
+ step_h = BLOCK_STEP [mv_mode][0];
+ step_v = BLOCK_STEP [mv_mode][1];
+
+ for (j = j0; j < j0 + step_v0; j += step_v)
+ {
+ PicMotion **mv;
+ int block_j = j << 2;
+ currMB->subblock_y = block_j; // position used for context determination
+ j4 = currMB->block_y + j;
+ mv = &list_motion[j4];
+ mvd = &currMB->mvd [list][j];
+ for (i = i0; i < i0 + step_h0; i += step_h)
+ {
+ int block_i=i << 2;
+ currMB->subblock_x = block_i; // position used for context determination
+ i4 = currMB->block_x + i;
+
+ get_neighbors(currMB, block, block_i, block_j, 4 * step_h);
+
+ // first make mv-prediction
+ currMB->GetMVPredictor (currMB, block, pred_mv, cur_ref_idx, list_motion, block_i, block_j, 4 * step_h, 4 * step_v);
+
+ for (k=0; k < 2; ++k)
+ {
+ //currSE.value2 = (k << 1) + list; // identifies the component; only used for context determination
+ curr_mvd[k] = (short)readMVD_CABAC(currMB, &dP->de_cabac, k, list, block_i, block_j);
+ curr_mv [k] = (short)(curr_mvd[k] + pred_mv[k]); // compute motion vector
+ }
+
+ // Init motion vectors
+ for(jj = 0; jj < step_v; ++jj)
+ {
+ for(ii = i4; ii < i4 + step_h; ++ii)
+ {
+ *(int32_t *)(&mv[jj][ii].mv) = *(int32_t *)curr_mv;
+ }
+ }
+
+ // Init first line (mvd)
+ for(ii = i; ii < i + step_h; ++ii)
+ {
+ *(int32_t *)(mvd[0][ii]) = *(int32_t *)curr_mvd;
+ }
+
+ // now copy all other lines
+ for(jj = 1; jj < step_v; ++jj)
+ {
+ memcpy_amd(mvd[jj][i], mvd[0][i], step_h * sizeof(MotionVector));
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* initializes the current macroblock
+************************************************************************
+*/
+void start_macroblock(Slice *currSlice, Macroblock **currMB)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int mb_nr = p_Vid->current_mb_nr;
+ Macroblock *mb = &p_Vid->mb_data[mb_nr]; // intialization code deleted, see below, StW
+ *currMB = mb;
+
+ mb->p_Vid = p_Vid;
+ mb->p_Slice = currSlice;
+ mb->mbAddrX = mb_nr;
+
+ //assert (mb_nr < (int) p_Vid->PicSizeInMbs);
+
+ /* Update coordinates of the current macroblock */
+ if (currSlice->mb_aff_frame_flag)
+ {
+ mb->mb_x = (mb_nr) % ((2*p_Vid->width) / MB_BLOCK_SIZE);
+ mb->mb_y = 2*((mb_nr) / ((2*p_Vid->width) / MB_BLOCK_SIZE));
+
+ mb->mb_y += (mb->mb_x & 0x01);
+ mb->mb_x >>= 1;
+ }
+ else
+ {
+ mb->mb_x = p_Vid->PicPos[mb_nr][0];
+ mb->mb_y = p_Vid->PicPos[mb_nr][1];
+ }
+
+ /* Define vertical positions */
+ mb->block_y = mb->mb_y * BLOCK_SIZE; /* luma block position */
+ mb->block_y_aff = mb->block_y;
+ mb->pix_y = mb->mb_y * MB_BLOCK_SIZE; /* luma macroblock position */
+ mb->pix_c_y = mb->mb_y * p_Vid->mb_cr_size_y; /* chroma macroblock position */
+
+ /* Define horizontal positions */
+ mb->block_x = mb->mb_x * BLOCK_SIZE; /* luma block position */
+ mb->pix_x = mb->mb_x * MB_BLOCK_SIZE; /* luma pixel position */
+ mb->pix_c_x = mb->mb_x * p_Vid->mb_cr_size_x; /* chroma pixel position */
+
+ // Save the slice number of this macroblock. When the macroblock below
+ // is coded it will use this to decide if prediction for above is possible
+ mb->slice_nr = (short) p_Vid->current_slice_nr;
+
+ if (p_Vid->current_slice_nr >= MAX_NUM_SLICES)
+ {
+ error ("Maximum number of supported slices exceeded. \nPlease recompile with increased value for MAX_NUM_SLICES", 200);
+ }
+
+ dec_picture->slice_id[mb->mb_y][mb->mb_x] = (short) p_Vid->current_slice_nr;
+ dec_picture->max_slice_id = (short) imax(p_Vid->current_slice_nr, dec_picture->max_slice_id);
+
+ CheckAvailabilityOfNeighbors(mb);
+
+ // Select appropriate MV predictor function
+ init_motion_vector_prediction(*currMB, currSlice->mb_aff_frame_flag);
+
+ set_read_and_store_CBP(currMB, currSlice->active_sps->chroma_format_idc);
+
+ // Reset syntax element entries in MB struct
+ update_qp(*currMB, p_Vid->qp);
+ mb->mb_type = 0;
+ mb->delta_quant = 0;
+ mb->cbp = 0;
+ mb->c_ipred_mode = DC_PRED_8; //GB
+
+ if (currSlice->slice_type != I_SLICE)
+ {
+ if (currSlice->slice_type != B_SLICE)
+ memzero64(mb->mvd);//, BLOCK_MULTIPLE * BLOCK_MULTIPLE * 2 * sizeof(short));
+ else
+ memzero128(mb->mvd);//, 2 * BLOCK_MULTIPLE * BLOCK_MULTIPLE * 2 * sizeof(short));
+ }
+
+ memzero24(mb->cbp_blk);
+ memzero24(mb->cbp_bits);
+ memzero24(mb->cbp_bits_8x8);
+
+ // initialize currSlice->mb_rres
+ memset(currSlice->mb_rres8, 0, sizeof(currSlice->mb_rres8));
+
+ // store filtering parameters for this MB
+ mb->DFDisableIdc = currSlice->DFDisableIdc;
+ mb->DFAlphaC0Offset = currSlice->DFAlphaC0Offset;
+ mb->DFBetaOffset = currSlice->DFBetaOffset;
+
+}
+
+/*!
+************************************************************************
+* \brief
+* set coordinates of the next macroblock
+* check end_of_slice condition
+************************************************************************
+*/
+Boolean exit_macroblock(Slice *currSlice, int eos_bit)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ //! The if() statement below resembles the original code, which tested
+ //! p_Vid->current_mb_nr == p_Vid->PicSizeInMbs. Both is, of course, nonsense
+ //! In an error prone environment, one can only be sure to have a new
+ //! picture by checking the tr of the next slice header!
+
+ // printf ("exit_macroblock: FmoGetLastMBOfPicture %d, p_Vid->current_mb_nr %d\n", FmoGetLastMBOfPicture(), p_Vid->current_mb_nr);
+ ++(p_Vid->num_dec_mb);
+
+ if (p_Vid->num_dec_mb == p_Vid->PicSizeInMbs)
+ {
+ return TRUE;
+ }
+ // ask for last mb in the slice CAVLC
+ else
+ {
+
+ p_Vid->current_mb_nr = FmoGetNextMBNr (p_Vid, p_Vid->current_mb_nr);
+
+ if (p_Vid->current_mb_nr == -1) // End of Slice group, MUST be end of slice
+ {
+ assert (currSlice->nal_startcode_follows (currSlice, eos_bit) == TRUE);
+ return TRUE;
+ }
+
+ if(currSlice->nal_startcode_follows(currSlice, eos_bit) == FALSE)
+ return FALSE;
+
+ if(currSlice->slice_type == I_SLICE || currSlice->slice_type == SI_SLICE || p_Vid->active_pps->entropy_coding_mode_flag == CABAC)
+ return TRUE;
+ if(p_Vid->cod_counter <= 0)
+ return TRUE;
+ return FALSE;
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Interpret the mb mode for P-Frames
+************************************************************************
+*/
+static void interpret_mb_mode_P(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ static const int ICBPTAB[6] = {0,16,32,15,31,47};
+ int mbmode = currMB->mb_type;
+
+#define ZERO_P8x8 (mbmode==5)
+#define MODE_IS_P8x8 (mbmode==4 || mbmode==5)
+#define MODE_IS_I4x4 (mbmode==6)
+#define I16OFFSET (mbmode-7)
+#define MODE_IS_IPCM (mbmode==31)
+
+ if(mbmode <4)
+ {
+ currMB->mb_type = mbmode;
+ memset(&currMB->b8mode[0],mbmode,4 * sizeof(char));
+ memset(&currMB->b8pdir[0], 0, 4 * sizeof(char));
+ }
+ else if(MODE_IS_P8x8)
+ {
+ currMB->mb_type = P8x8;
+ p_Vid->allrefzero = ZERO_P8x8;
+ }
+ else if(MODE_IS_I4x4)
+ {
+ currMB->mb_type = I4MB;
+ memset(&currMB->b8mode[0],IBLOCK, 4 * sizeof(char));
+ memset(&currMB->b8pdir[0], -1, 4 * sizeof(char));
+ }
+ else if(MODE_IS_IPCM)
+ {
+ currMB->mb_type = IPCM;
+ currMB->cbp = -1;
+ currMB->i16mode = 0;
+
+ memset(&currMB->b8mode[0], 0, 4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1, 4 * sizeof(char));
+ }
+ else
+ {
+ currMB->mb_type = I16MB;
+ currMB->cbp = ICBPTAB[(I16OFFSET)>>2];
+ currMB->i16mode = (I16OFFSET) & 0x03;
+ memset(&currMB->b8mode[0], 0, 4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1, 4 * sizeof(char));
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Interpret the mb mode for I-Frames
+************************************************************************
+*/
+static void interpret_mb_mode_I(Macroblock *currMB)
+{
+ static const int ICBPTAB[6] = {0,16,32,15,31,47};
+ int mbmode = currMB->mb_type;
+
+ if (mbmode==0)
+ {
+ currMB->mb_type = I4MB;
+ memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ }
+ else if(mbmode==25)
+ {
+ currMB->mb_type=IPCM;
+ currMB->cbp= -1;
+ currMB->i16mode = 0;
+
+ memset(&currMB->b8mode[0],0,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ }
+ else
+ {
+ currMB->mb_type = I16MB;
+ currMB->cbp= ICBPTAB[(mbmode-1)>>2];
+ currMB->i16mode = (mbmode-1) & 0x03;
+ memset(&currMB->b8mode[0], 0, 4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1, 4 * sizeof(char));
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Interpret the mb mode for B-Frames
+************************************************************************
+*/
+static void interpret_mb_mode_B(Macroblock *currMB)
+{
+ static const int offset2pdir16x16[12] = {0, 0, 1, 2, 0,0,0,0,0,0,0,0};
+ static const int offset2pdir16x8[22][2] = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,1},{0,0},{0,1},{0,0},{1,0},
+ {0,0},{0,2},{0,0},{1,2},{0,0},{2,0},{0,0},{2,1},{0,0},{2,2},{0,0}};
+ static const int offset2pdir8x16[22][2] = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,1},{0,0},{0,1},{0,0},
+ {1,0},{0,0},{0,2},{0,0},{1,2},{0,0},{2,0},{0,0},{2,1},{0,0},{2,2}};
+
+ static const int ICBPTAB[6] = {0,16,32,15,31,47};
+
+ int i, mbmode;
+ int mbtype = currMB->mb_type;
+
+ //--- set mbtype, b8type, and b8pdir ---
+ if (mbtype==0) // direct
+ {
+ mbmode=0;
+ memset(&currMB->b8mode[0],0,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],2,4 * sizeof(char));
+ }
+ else if (mbtype==23) // intra4x4
+ {
+ mbmode=I4MB;
+ memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ }
+ else if ((mbtype>23) && (mbtype<48) ) // intra16x16
+ {
+ mbmode=I16MB;
+ memset(&currMB->b8mode[0],0,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+
+ currMB->cbp = ICBPTAB[(mbtype-24)>>2];
+ currMB->i16mode = (mbtype-24) & 0x03;
+ }
+ else if (mbtype==22) // 8x8(+split)
+ {
+ mbmode=P8x8; // b8mode and pdir is transmitted in additional codewords
+ }
+ else if (mbtype<4) // 16x16
+ {
+ mbmode=1;
+ memset(&currMB->b8mode[0], 1,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],offset2pdir16x16[mbtype],4 * sizeof(char));
+ }
+ else if(mbtype==48)
+ {
+ mbmode=IPCM;
+ memset(&currMB->b8mode[0], 0,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+
+ currMB->cbp= -1;
+ currMB->i16mode = 0;
+ }
+
+ else if ((mbtype&0x01)==0) // 16x8
+ {
+ mbmode=2;
+ memset(&currMB->b8mode[0], 2,4 * sizeof(char));
+ for(i=0;i<4;++i)
+ {
+ currMB->b8pdir[i] = (char) offset2pdir16x8 [mbtype][i>>1];
+ }
+ }
+ else
+ {
+ mbmode=3;
+ memset(&currMB->b8mode[0], 3,4 * sizeof(char));
+ for(i=0;i<4; ++i)
+ {
+ currMB->b8pdir[i] = (char) offset2pdir8x16 [mbtype][i&0x01];
+ }
+ }
+ currMB->mb_type = mbmode;
+}
+/*!
+************************************************************************
+* \brief
+* Interpret the mb mode for SI-Frames
+************************************************************************
+*/
+static void interpret_mb_mode_SI(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int ICBPTAB[6] = {0,16,32,15,31,47};
+ int mbmode = currMB->mb_type;
+
+ if (mbmode==0)
+ {
+ currMB->mb_type = SI4MB;
+ memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ p_Vid->siblock[currMB->mb_y][currMB->mb_x]=1;
+ }
+ else if (mbmode==1)
+ {
+ currMB->mb_type = I4MB;
+ memset(&currMB->b8mode[0],IBLOCK,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ }
+ else if(mbmode==26)
+ {
+ currMB->mb_type=IPCM;
+ currMB->cbp= -1;
+ currMB->i16mode = 0;
+ memset(&currMB->b8mode[0],0,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ }
+
+ else
+ {
+ currMB->mb_type = I16MB;
+ currMB->cbp= ICBPTAB[(mbmode-2)>>2];
+ currMB->i16mode = (mbmode-2) & 0x03;
+ memset(&currMB->b8mode[0],0,4 * sizeof(char));
+ memset(&currMB->b8pdir[0],-1,4 * sizeof(char));
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Set mode interpretation based on slice type
+************************************************************************
+*/
+void setup_slice_methods(Slice *currSlice)
+{
+ switch (currSlice->slice_type)
+ {
+ case P_SLICE:
+ currSlice->interpret_mb_mode = interpret_mb_mode_P;
+ currSlice->read_motion_info_from_NAL = read_motion_info_from_NAL_p_slice;
+ currSlice->read_one_macroblock = read_one_macroblock_p_slice;
+ currSlice->decode_one_component = decode_one_component_p_slice;
+ break;
+ case SP_SLICE:
+ currSlice->interpret_mb_mode = interpret_mb_mode_P;
+ currSlice->read_motion_info_from_NAL = read_motion_info_from_NAL_p_slice;
+ currSlice->read_one_macroblock = read_one_macroblock_p_slice;
+ currSlice->decode_one_component = decode_one_component_sp_slice;
+ break;
+ case B_SLICE:
+ currSlice->interpret_mb_mode = interpret_mb_mode_B;
+ currSlice->read_motion_info_from_NAL = read_motion_info_from_NAL_b_slice;
+ currSlice->read_one_macroblock = read_one_macroblock_b_slice;
+ currSlice->decode_one_component = decode_one_component_b_slice;
+ break;
+ case I_SLICE:
+ currSlice->interpret_mb_mode = interpret_mb_mode_I;
+ currSlice->read_motion_info_from_NAL = NULL;
+ currSlice->read_one_macroblock = read_one_macroblock_i_slice;
+ currSlice->decode_one_component = decode_one_component_i_slice;
+ break;
+ case SI_SLICE:
+ currSlice->interpret_mb_mode = interpret_mb_mode_SI;
+ currSlice->read_motion_info_from_NAL = NULL;
+ currSlice->read_one_macroblock = read_one_macroblock_i_slice;
+ currSlice->decode_one_component = decode_one_component_i_slice;
+ break;
+ default:
+ printf("Unsupported slice type\n");
+ break;
+ }
+
+ if( IS_INDEPENDENT(currSlice->p_Vid) )
+ currSlice->compute_colocated = compute_colocated_JV;
+ else
+ {
+ if (currSlice->active_sps->frame_mbs_only_flag)
+ currSlice->compute_colocated = compute_colocated;
+ else
+ currSlice->compute_colocated = compute_colocated_frames_mbs;
+ }
+
+ switch(currSlice->p_Vid->active_pps->entropy_coding_mode_flag)
+ {
+ case CABAC:
+ currSlice->read_CBP_and_coeffs_from_NAL = read_CBP_and_coeffs_from_NAL_CABAC;
+ break;
+ case CAVLC:
+ currSlice->read_CBP_and_coeffs_from_NAL = read_CBP_and_coeffs_from_NAL_CAVLC;
+ break;
+ default:
+ printf("Unsupported entropy coding mode\n");
+ break;
+ }
+
+}
+
+void macroblock_set_dc_pred(VideoParameters *p_Vid, int block_x, int block_y)
+{
+ int32_t dc_pred = 2 + (2 << 8) + (2 << 16) + (2 << 24);
+ int32_t *pred = (int32_t *)&p_Vid->ipredmode[block_y][block_x];
+ int stride = p_Vid->PicWidthInMbs;
+ int i;
+ for (i=0;i<BLOCK_SIZE;i++)
+ {
+ *pred = dc_pred;
+ pred += stride;
+ }
+}
+/*!
+************************************************************************
+* \brief
+* init macroblock I and P frames
+************************************************************************
+*/
+#ifdef _M_IX86
+static void init_macroblock(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int j;
+ int block_x = currMB->block_x, block_y = currMB->block_y;
+ PicMotionParams *motion = &p_Vid->dec_picture->motion;
+ PicMotion **list_motion0, **list_motion1;
+ __m64 const_0_minus_1 = _mm_setr_pi32(0, -1);
+ macroblock_set_dc_pred(p_Vid, block_x, block_y);
+
+ // reset vectors and pred. modes
+ list_motion0 = motion->motion[LIST_0];
+ for(j = 0; j < BLOCK_SIZE; j++)
+ {
+ PicMotion *block = &list_motion0[block_y+j][block_x];
+ block[0].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[0].mv = const_0_minus_1;
+
+ block[1].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[1].mv = const_0_minus_1;
+
+ block[2].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[2].mv = const_0_minus_1;
+
+
+ block[3].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[3].mv = const_0_minus_1;
+ }
+
+ list_motion1 = motion->motion[LIST_1];
+ for(j = 0; j < BLOCK_SIZE; j++)
+ {
+ PicMotion *block = &list_motion1[block_y+j][block_x];
+
+ block[0].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[0].mv = const_0_minus_1;
+
+ block[1].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[1].mv = const_0_minus_1;
+
+ block[2].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[2].mv = const_0_minus_1;
+
+
+ block[3].ref_pic_id = UNDEFINED_REFERENCE;
+ *(__m64 *)&block[3].mv = const_0_minus_1;
+ }
+
+}
+
+
+#else
+static void init_macroblock(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i, j;
+ int block_x = currMB->block_x, block_y = currMB->block_y;
+ PicMotionParams *motion = &p_Vid->dec_picture->motion;
+ PicMotion **list_motion0, **list_motion1;
+ macroblock_set_dc_pred(p_Vid, block_x, block_y);
+
+ // reset vectors and pred. modes
+ list_motion0 = motion->motion[LIST_0];
+ for(j = 0; j < BLOCK_SIZE; j++)
+ {
+ PicMotion *block0 = &list_motion0[block_y+j][block_x];
+ block0[0].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block0[0].mv, 0, sizeof(MotionVector));
+ block0[0].ref_idx = -1;
+
+ block0[1].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block0[1].mv, 0, sizeof(MotionVector));
+ block0[1].ref_idx = -1;
+
+ block0[2].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block0[2].mv, 0, sizeof(MotionVector));
+ block0[2].ref_idx = -1;
+
+ block0[3].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block0[3].mv, 0, sizeof(MotionVector));
+ block0[3].ref_idx = -1;
+
+
+ }
+
+ list_motion1 = motion->motion[LIST_1];
+ for(j = 0; j < BLOCK_SIZE; j++)
+ {
+ PicMotion *block1 = &list_motion1[block_y+j][block_x];
+
+ block1[0].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block1[0].mv, 0, sizeof(MotionVector));
+ block1[0].ref_idx = -1;
+
+ block1[1].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block1[1].mv, 0, sizeof(MotionVector));
+ block1[1].ref_idx = -1;
+
+ block1[2].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block1[2].mv, 0, sizeof(MotionVector));
+ block1[2].ref_idx = -1;
+
+ block1[3].ref_pic_id = UNDEFINED_REFERENCE;
+ memset(block1[3].mv, 0, sizeof(MotionVector));
+ block1[3].ref_idx = -1;
+ }
+
+}
+
+
+#endif
+/*!
+************************************************************************
+* \brief
+* Sets mode for 8x8 block
+************************************************************************
+*/
+void SetB8Mode (Macroblock* currMB, int value, int i)
+{
+ Slice* currSlice = currMB->p_Slice;
+ static const char p_v2b8 [ 5] = {4, 5, 6, 7, IBLOCK};
+ static const char p_v2pd [ 5] = {0, 0, 0, 0, -1};
+ static const char b_v2b8 [14] = {0, 4, 4, 4, 5, 6, 5, 6, 5, 6, 7, 7, 7, IBLOCK};
+ static const char b_v2pd [14] = {2, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 2, -1};
+
+ if (currSlice->slice_type==B_SLICE)
+ {
+ currMB->b8mode[i] = b_v2b8[value];
+ currMB->b8pdir[i] = b_v2pd[value];
+ }
+ else
+ {
+ currMB->b8mode[i] = p_v2b8[value];
+ currMB->b8pdir[i] = p_v2pd[value];
+ }
+}
+
+
+void reset_coeffs(Slice *currSlice)
+{
+
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ // reset all coeffs
+#ifdef _DEBUG
+ {
+ int m;
+ for (m=0;m<3;m++)
+ {
+ int z;
+ short *b = &currSlice->cof[m][0][0];
+ for (z=0;z<256;z++)
+ {
+ if (b[z] != 0)
+ {
+ DebugBreak();
+ }
+ }
+ }
+ }
+#endif
+
+ // benski> don't think this is necessary... enable check above to be sure
+ // memset(currSlice->cof, 0, sizeof(currSlice->cof));
+
+ // CAVLC
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ memzero48(p_Vid->nz_coeff[p_Vid->current_mb_nr]);
+}
+
+void field_flag_inference(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ if (currMB->mb_avail_left)
+ {
+ currMB->mb_field = p_Vid->mb_data[currMB->mb_addr_left].mb_field;
+ }
+ else
+ {
+ // check top macroblock pair
+ currMB->mb_field = currMB->mb_avail_up ? p_Vid->mb_data[currMB->mb_addr_up].mb_field : FALSE;
+ }
+}
+
+
+static void skip_macroblock(Macroblock *currMB)
+{
+ short pred_mv[2];
+ int zeroMotionAbove;
+ int zeroMotionLeft;
+ PixelPos mb[4]; // neighbor blocks
+ int i, j;
+ int a_mv_y = 0;
+ int a_ref_idx = 0;
+ int b_mv_y = 0;
+ int b_ref_idx = 0;
+ int img_block_y = currMB->block_y;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ int list_offset = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (currMB->mbAddrX & 0x01) ? 4 : 2 : 0;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+ short *a_mv = NULL;
+ short *b_mv = NULL;
+
+ get_neighbors0016(currMB, mb);
+
+ if (mb[0].available)
+ {
+ a_mv = motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].mv;
+ a_mv_y = a_mv[1];
+ a_ref_idx = motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx;
+
+ if (currMB->mb_field && !p_Vid->mb_data[mb[0].mb_addr].mb_field)
+ {
+ a_mv_y /=2;
+ a_ref_idx *=2;
+ }
+ if (!currMB->mb_field && p_Vid->mb_data[mb[0].mb_addr].mb_field)
+ {
+ a_mv_y *=2;
+ a_ref_idx >>=1;
+ }
+ }
+
+ if (mb[1].available)
+ {
+ b_mv = motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].mv;
+ b_mv_y = b_mv[1];
+ b_ref_idx = motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx;
+
+ if (currMB->mb_field && !p_Vid->mb_data[mb[1].mb_addr].mb_field)
+ {
+ b_mv_y /=2;
+ b_ref_idx *=2;
+ }
+ if (!currMB->mb_field && p_Vid->mb_data[mb[1].mb_addr].mb_field)
+ {
+ b_mv_y *=2;
+ b_ref_idx >>=1;
+ }
+ }
+
+ zeroMotionLeft = !mb[0].available ? 1 : a_ref_idx==0 && a_mv[0]==0 && a_mv_y==0 ? 1 : 0;
+ zeroMotionAbove = !mb[1].available ? 1 : b_ref_idx==0 && b_mv[0]==0 && b_mv_y==0 ? 1 : 0;
+
+ currMB->cbp = 0;
+ reset_coeffs(currSlice);
+
+ if (zeroMotionAbove || zeroMotionLeft)
+ {
+ for(j = img_block_y; j < img_block_y + BLOCK_SIZE; ++j)
+ {
+ for(i=currMB->block_x;i<currMB->block_x + BLOCK_SIZE; ++i)
+ {
+ memset(&motion->motion[LIST_0][j][i].mv, 0, sizeof(MotionVector));
+ motion->motion[LIST_0][j][i].ref_idx=0;
+ motion->motion[LIST_0][j][i].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][0];
+ }
+ }
+ }
+ else
+ {
+ currMB->GetMVPredictor (currMB, mb, pred_mv, 0, motion->motion[LIST_0], 0, 0, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+
+ // Set first block line (position img_block_y)
+ for(j=img_block_y; j < img_block_y + BLOCK_SIZE; ++j)
+ {
+ for(i=currMB->block_x;i<currMB->block_x + BLOCK_SIZE; ++i)
+ {
+ memcpy(&motion->motion[LIST_0][j][i].mv, pred_mv, sizeof(MotionVector));
+ motion->motion[LIST_0][j][i].ref_idx=0;
+ motion->motion[LIST_0][j][i].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][0];
+ }
+ }
+ }
+}
+
+static void concealIPCMcoeffs(Macroblock *currMB)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int i, j, k;
+
+ for(i=0;i<MB_BLOCK_SIZE;++i)
+ {
+ for(j=0;j<MB_BLOCK_SIZE;++j)
+ {
+ currSlice->ipcm[0][i][j] = p_Vid->dc_pred_value_comp[0];
+ }
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid))
+ {
+ for (k = 0; k < 2; ++k)
+ {
+ for(i=0;i<p_Vid->mb_cr_size_y;++i)
+ {
+ for(j=0;j<p_Vid->mb_cr_size_x;++j)
+ {
+ currSlice->ipcm[k][i][j] = p_Vid->dc_pred_value_comp[k];
+ }
+ }
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Get the syntax elements from the NAL
+************************************************************************
+*/
+static void read_one_macroblock_i_slice(Macroblock *currMB)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ SyntaxElement currSE;
+ int mb_nr = currMB->mbAddrX;
+
+ DataPartition *dP;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+
+ currMB->mb_field = ((mb_nr&0x01) == 0)? FALSE : p_Vid->mb_data[mb_nr-1].mb_field;
+
+ update_qp(currMB, p_Vid->qp);
+
+ // read MB mode *****************************************************************
+ dP = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ currSE.mapping = linfo_ue;
+
+ // read MB aff
+ if (currSlice->mb_aff_frame_flag && (mb_nr&0x01)==0)
+ {
+ TRACE_STRING("mb_field_decoding_flag");
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ currMB->mb_field = readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+ else
+ {
+ currMB->mb_field = readFieldModeInfo_CABAC(currMB, &dP->de_cabac);
+ }
+ }
+
+ if(p_Vid->active_pps->entropy_coding_mode_flag == CABAC)
+ {
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+
+ // read MB type
+ currMB->mb_type = readMB_typeInfo_CABAC(currMB, &dP->de_cabac);
+ }
+ else
+ { // CAVLC
+ // read MB type
+ readSyntaxElement_UVLC(&currSE, dP);
+ currMB->mb_type = currSE.value1;
+ }
+
+
+
+ currMB->ei_flag = 0;
+
+ motion->mb_field[mb_nr] = (byte) currMB->mb_field;
+
+ currMB->block_y_aff = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (mb_nr&0x01) ? (currMB->block_y - 4)>>1 : currMB->block_y >> 1 : currMB->block_y;
+
+ p_Vid->siblock[currMB->mb_y][currMB->mb_x] = 0;
+
+ currSlice->interpret_mb_mode(currMB);
+
+ //init NoMbPartLessThan8x8Flag
+ currMB->NoMbPartLessThan8x8Flag = TRUE;
+
+ //============= Transform Size Flag for INTRA MBs =============
+ //-------------------------------------------------------------
+ //transform size flag for INTRA_4x4 and INTRA_8x8 modes
+ if (currMB->mb_type == I4MB && p_Vid->Transform8x8Mode)
+ {
+ dP = &(currSlice->partArr[partMap[SE_HEADER]]);
+ TRACE_STRING("transform_size_8x8_flag");
+
+ // read CAVLC transform_size_8x8_flag
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ currMB->luma_transform_size_8x8_flag = readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+ else
+ {
+ currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &dP->de_cabac);
+ }
+
+ if (currMB->luma_transform_size_8x8_flag)
+ {
+ currMB->mb_type = I8MB;
+ memset(&currMB->b8mode, I8MB, 4 * sizeof(char));
+ memset(&currMB->b8pdir, -1, 4 * sizeof(char));
+ }
+ }
+ else
+ {
+ currMB->luma_transform_size_8x8_flag = FALSE;
+ }
+
+ //--- init macroblock data ---
+ init_macroblock(currMB);
+
+ if(currMB->mb_type != IPCM)
+ {
+ // intra prediction modes for a macroblock 4x4 **********************************************
+ read_ipred_modes(currMB);
+
+ // read CBP and Coeffs ***************************************************************
+ currSlice->read_CBP_and_coeffs_from_NAL (currMB);
+ }
+ else
+ {
+ //read pcm_alignment_zero_bit and pcm_byte[i]
+
+ // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the
+ // same category as MBTYPE
+ if ( currSlice->dp_mode && currSlice->dpB_NotPresent )
+ {
+ concealIPCMcoeffs(currMB);
+ }
+ else
+ {
+ dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]);
+ read_IPCM_coeffs_from_NAL(currSlice, dP);
+ }
+ }
+
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* Get the syntax elements from the NAL
+************************************************************************
+*/
+static void read_one_macroblock_p_slice(Macroblock *currMB)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ int i;
+
+ SyntaxElement currSE;
+ int mb_nr = currMB->mbAddrX;
+
+ DataPartition *dP;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ Macroblock *topMB = NULL;
+ int prevMbSkipped = 0;
+ int check_bottom, read_bottom, read_top;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+
+ if (currSlice->mb_aff_frame_flag)
+ {
+ if (mb_nr&0x01)
+ {
+ topMB= &p_Vid->mb_data[mb_nr-1];
+ prevMbSkipped = (topMB->mb_type == 0);
+ }
+ else
+ prevMbSkipped = 0;
+ }
+
+ currMB->mb_field = ((mb_nr&0x01) == 0)? FALSE : p_Vid->mb_data[mb_nr-1].mb_field;
+
+ update_qp(currMB, p_Vid->qp);
+
+ // read MB mode *****************************************************************
+ dP = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ currSE.mapping = linfo_ue;
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CABAC)
+ {
+ int skip;
+ // read MB skip_flag
+ if (currSlice->mb_aff_frame_flag && ((mb_nr&0x01) == 0||prevMbSkipped))
+ field_flag_inference(currMB);
+
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+ TRACE_STRING("mb_skip_flag");
+ skip = readMB_skip_flagInfo_CABAC(currMB, &dP->de_cabac);
+
+ currMB->mb_type = !skip;
+ currMB->skip_flag = skip;
+
+ currMB->ei_flag = 0;
+
+ // read MB AFF
+ if (currSlice->mb_aff_frame_flag)
+ {
+ check_bottom=read_bottom=read_top=0;
+ if ((mb_nr&0x01)==0)
+ {
+ check_bottom = currMB->skip_flag;
+ read_top = !check_bottom;
+ }
+ else
+ {
+ read_bottom = (topMB->skip_flag && (!currMB->skip_flag));
+ }
+
+ if (read_bottom || read_top)
+ {
+ TRACE_STRING("mb_field_decoding_flag");
+ currMB->mb_field = readFieldModeInfo_CABAC(currMB, &dP->de_cabac);
+ }
+ if (check_bottom)
+ check_next_mb_and_get_field_mode_CABAC(currSlice, dP);
+
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+ }
+
+ // read MB type
+ if (currMB->mb_type != 0 )
+ {
+ TRACE_STRING("mb_type");
+ currMB->mb_type = readMB_typeInfo_CABAC(currMB, &dP->de_cabac);
+ currMB->ei_flag = 0;
+ }
+ }
+ // VLC Non-Intra
+ else
+ {
+ if(p_Vid->cod_counter == -1)
+ {
+ TRACE_STRING("mb_skip_run");
+ readSyntaxElement_UVLC(&currSE, dP);
+ p_Vid->cod_counter = currSE.value1;
+ }
+ if (p_Vid->cod_counter==0)
+ {
+ // read MB aff
+ if ((currSlice->mb_aff_frame_flag) && (((mb_nr&0x01)==0) || ((mb_nr&0x01) && prevMbSkipped)))
+ {
+ TRACE_STRING("mb_field_decoding_flag");
+ currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+
+ // read MB type
+ TRACE_STRING("mb_type");
+ readSyntaxElement_UVLC(&currSE, dP);
+ if(currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE)
+ ++(currSE.value1);
+ currMB->mb_type = currSE.value1;
+ currMB->ei_flag = 0;
+ p_Vid->cod_counter--;
+ currMB->skip_flag = 0;
+ }
+ else
+ {
+ p_Vid->cod_counter--;
+ currMB->mb_type = 0;
+ currMB->ei_flag = 0;
+ currMB->skip_flag = 1;
+
+ // read field flag of bottom block
+ if(currSlice->mb_aff_frame_flag)
+ {
+ if(p_Vid->cod_counter == 0 && ((mb_nr&0x01) == 0))
+ {
+ TRACE_STRING("mb_field_decoding_flag (of coded bottom mb)");
+ currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ dP->bitstream->frame_bitoffset--;
+ TRACE_DECBITS(1);
+ }
+ else if (p_Vid->cod_counter > 0 && ((mb_nr & 0x01) == 0))
+ {
+ // check left macroblock pair first
+ if (mb_is_available(mb_nr - 2, currMB) && ((mb_nr % (p_Vid->PicWidthInMbs * 2))!=0))
+ {
+ currMB->mb_field = p_Vid->mb_data[mb_nr-2].mb_field;
+ }
+ else
+ {
+ // check top macroblock pair
+ if (mb_is_available(mb_nr - 2*p_Vid->PicWidthInMbs, currMB))
+ {
+ currMB->mb_field = p_Vid->mb_data[mb_nr-2*p_Vid->PicWidthInMbs].mb_field;
+ }
+ else
+ currMB->mb_field = FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ motion->mb_field[mb_nr] = (byte) currMB->mb_field;
+
+ currMB->block_y_aff = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (mb_nr&0x01) ? (currMB->block_y - 4)>>1 : currMB->block_y >> 1 : currMB->block_y;
+
+ p_Vid->siblock[currMB->mb_y][currMB->mb_x] = 0;
+
+ currSlice->interpret_mb_mode(currMB);
+
+ if(currSlice->mb_aff_frame_flag)
+ {
+ if(currMB->mb_field)
+ {
+ currSlice->num_ref_idx_l0_active <<=1;
+ currSlice->num_ref_idx_l1_active <<=1;
+ }
+ }
+
+ //init NoMbPartLessThan8x8Flag
+ currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(p_Vid->active_sps->direct_8x8_inference_flag))? FALSE: TRUE;
+
+ //====== READ 8x8 SUB-PARTITION MODES (modes of 8x8 blocks) and Intra VBST block modes ======
+ if (currMB->mb_type == P8x8)
+ {
+ dP = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag ==CAVLC)
+ {
+ currSE.mapping = linfo_ue;
+ for (i = 0; i < 4; ++i)
+ {
+ TRACE_STRING("sub_mb_type");
+ readSyntaxElement_UVLC(&currSE, dP);
+ SetB8Mode (currMB, currSE.value1, i);
+
+ //set NoMbPartLessThan8x8Flag for P8x8 mode
+ currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) ||
+ (currMB->b8mode[i]==4);
+ }
+ }
+ else
+ {
+ for (i = 0; i < 4; ++i)
+ {
+ int value = readB8_typeInfo_CABAC(currSlice, &dP->de_cabac);
+ SetB8Mode (currMB, value, i);
+
+ //set NoMbPartLessThan8x8Flag for P8x8 mode
+ currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) ||
+ (currMB->b8mode[i]==4);
+ }
+ }
+
+ //--- init macroblock data ---
+ init_macroblock (currMB);
+ currSlice->read_motion_info_from_NAL (currMB);
+ }
+
+ //============= Transform Size Flag for INTRA MBs =============
+ //-------------------------------------------------------------
+ //transform size flag for INTRA_4x4 and INTRA_8x8 modes
+ if (currMB->mb_type == I4MB && p_Vid->Transform8x8Mode)
+ {
+ dP = &(currSlice->partArr[partMap[SE_HEADER]]);
+ TRACE_STRING("transform_size_8x8_flag");
+
+ // read CAVLC transform_size_8x8_flag
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ currMB->luma_transform_size_8x8_flag = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+ else
+ {
+ currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &dP->de_cabac);
+ }
+
+ if (currMB->luma_transform_size_8x8_flag)
+ {
+ currMB->mb_type = I8MB;
+ memset(&currMB->b8mode, I8MB, 4 * sizeof(char));
+ memset(&currMB->b8pdir, -1, 4 * sizeof(char));
+ }
+ }
+ else
+ {
+ currMB->luma_transform_size_8x8_flag = FALSE;
+ }
+
+ if(p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ if( !IS_INTRA(currMB) )
+ {
+ p_Vid->intra_block[mb_nr] = 0;
+ }
+ }
+
+ //--- init macroblock data ---
+ if (currMB->mb_type != P8x8)
+ init_macroblock(currMB);
+
+ if (IS_SKIP (currMB)) //keep last macroblock
+ {
+ skip_macroblock(currMB);
+ }
+ else if(currMB->mb_type != IPCM)
+ {
+ // intra prediction modes for a macroblock 4x4 **********************************************
+ if (IS_INTRA(currMB))
+ read_ipred_modes(currMB);
+
+ // read inter frame vector data *********************************************************
+ if (IS_INTERMV (currMB) && (currMB->mb_type != P8x8))
+ {
+ currSlice->read_motion_info_from_NAL (currMB);
+ }
+ // read CBP and Coeffs ***************************************************************
+ currSlice->read_CBP_and_coeffs_from_NAL (currMB);
+ }
+ else
+ {
+ //read pcm_alignment_zero_bit and pcm_byte[i]
+
+ // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the
+ // same category as MBTYPE
+ if ( currSlice->dp_mode && currSlice->dpB_NotPresent )
+ {
+ concealIPCMcoeffs(currMB);
+ }
+ else
+ {
+ dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]);
+ read_IPCM_coeffs_from_NAL(currSlice, dP);
+ }
+ }
+
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* Get the syntax elements from the NAL
+************************************************************************
+*/
+static void read_one_macroblock_b_slice(Macroblock *currMB)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i;
+
+ SyntaxElement currSE;
+ int mb_nr = currMB->mbAddrX;
+
+ DataPartition *dP;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ Macroblock *topMB = NULL;
+ int prevMbSkipped = 0;
+ int check_bottom, read_bottom, read_top;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+
+ if (currSlice->mb_aff_frame_flag)
+ {
+ if (mb_nr&0x01)
+ {
+ topMB= &p_Vid->mb_data[mb_nr-1];
+ prevMbSkipped = topMB->skip_flag;
+ }
+ else
+ prevMbSkipped = 0;
+ }
+
+ currMB->mb_field = ((mb_nr&0x01) == 0)? FALSE : p_Vid->mb_data[mb_nr-1].mb_field;
+
+ update_qp(currMB, p_Vid->qp);
+
+ // read MB mode *****************************************************************
+ dP = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ currSE.mapping = linfo_ue;
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CABAC)
+ {
+ // read MB skip_flag
+ int skip;
+ if (currSlice->mb_aff_frame_flag && ((mb_nr&0x01) == 0||prevMbSkipped))
+ field_flag_inference(currMB);
+
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+ TRACE_STRING("mb_skip_flag");
+ skip = readMB_skip_flagInfo_CABAC(currMB, &dP->de_cabac);
+
+ currMB->mb_type = !skip;
+ currMB->skip_flag = skip;
+
+ currMB->cbp = !skip;
+
+ currMB->ei_flag = 0;
+
+ if (skip)
+ p_Vid->cod_counter=0;
+
+ // read MB AFF
+ if (currSlice->mb_aff_frame_flag)
+ {
+ check_bottom=read_bottom=read_top=0;
+ if ((mb_nr&0x01)==0)
+ {
+ check_bottom = currMB->skip_flag;
+ read_top = !check_bottom;
+ }
+ else
+ {
+ read_bottom = (topMB->skip_flag && (!currMB->skip_flag));
+ }
+
+ if (read_bottom || read_top)
+ {
+ TRACE_STRING("mb_field_decoding_flag");
+ currMB->mb_field = readFieldModeInfo_CABAC(currMB, &dP->de_cabac);
+ }
+ if (check_bottom)
+ check_next_mb_and_get_field_mode_CABAC(currSlice,dP);
+
+ CheckAvailabilityOfNeighborsCABAC(currMB);
+ }
+
+ // read MB type
+ if (currMB->mb_type != 0 )
+ {
+ TRACE_STRING("mb_type");
+ currMB->mb_type = readMB_typeInfo_CABAC(currMB, &dP->de_cabac);
+ currMB->ei_flag = 0;
+ }
+ }
+ // VLC Non-Intra
+ else
+ {
+ if(p_Vid->cod_counter == -1)
+ {
+ TRACE_STRING("mb_skip_run");
+ readSyntaxElement_UVLC(&currSE, dP);
+ p_Vid->cod_counter = currSE.value1;
+ }
+ if (p_Vid->cod_counter==0)
+ {
+ // read MB aff
+ if ((currSlice->mb_aff_frame_flag) && (((mb_nr&0x01)==0) || ((mb_nr&0x01) && prevMbSkipped)))
+ {
+ TRACE_STRING("mb_field_decoding_flag");
+ currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+
+ // read MB type
+ TRACE_STRING("mb_type");
+ readSyntaxElement_UVLC(&currSE, dP);
+ if(currSlice->slice_type == P_SLICE || currSlice->slice_type == SP_SLICE)
+ ++(currSE.value1);
+ currMB->mb_type = currSE.value1;
+ currMB->ei_flag = 0;
+ p_Vid->cod_counter--;
+ currMB->skip_flag = 0;
+ }
+ else
+ {
+ p_Vid->cod_counter--;
+ currMB->mb_type = 0;
+ currMB->ei_flag = 0;
+ currMB->skip_flag = 1;
+
+ // read field flag of bottom block
+ if(currSlice->mb_aff_frame_flag)
+ {
+ if(p_Vid->cod_counter == 0 && ((mb_nr&0x01) == 0))
+ {
+ TRACE_STRING("mb_field_decoding_flag (of coded bottom mb)");
+ currMB->mb_field = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ dP->bitstream->frame_bitoffset--;
+ TRACE_DECBITS(1);
+ }
+ else if (p_Vid->cod_counter > 0 && ((mb_nr & 0x01) == 0))
+ {
+ // check left macroblock pair first
+ if (mb_is_available(mb_nr - 2, currMB) && ((mb_nr % (p_Vid->PicWidthInMbs * 2))!=0))
+ {
+ currMB->mb_field = p_Vid->mb_data[mb_nr-2].mb_field;
+ }
+ else
+ {
+ // check top macroblock pair
+ if (mb_is_available(mb_nr - 2*p_Vid->PicWidthInMbs, currMB))
+ {
+ currMB->mb_field = p_Vid->mb_data[mb_nr-2*p_Vid->PicWidthInMbs].mb_field;
+ }
+ else
+ currMB->mb_field = FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ motion->mb_field[mb_nr] = (byte) currMB->mb_field;
+
+ currMB->block_y_aff = ((currSlice->mb_aff_frame_flag) && (currMB->mb_field)) ? (mb_nr&0x01) ? (currMB->block_y - 4)>>1 : currMB->block_y >> 1 : currMB->block_y;
+
+ p_Vid->siblock[currMB->mb_y][currMB->mb_x] = 0;
+
+ currSlice->interpret_mb_mode(currMB);
+
+ if(currSlice->mb_aff_frame_flag)
+ {
+ if(currMB->mb_field)
+ {
+ currSlice->num_ref_idx_l0_active <<=1;
+ currSlice->num_ref_idx_l1_active <<=1;
+ }
+ }
+
+ //init NoMbPartLessThan8x8Flag
+ currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(p_Vid->active_sps->direct_8x8_inference_flag))? FALSE: TRUE;
+
+ //====== READ 8x8 SUB-PARTITION MODES (modes of 8x8 blocks) and Intra VBST block modes ======
+ if (currMB->mb_type == P8x8)
+ {
+ dP = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag ==CAVLC)
+ {
+ currSE.mapping = linfo_ue;
+ for (i = 0; i < 4; ++i)
+ {
+ TRACE_STRING("sub_mb_type");
+ readSyntaxElement_UVLC(&currSE, dP);
+ SetB8Mode (currMB, currSE.value1, i);
+
+ //set NoMbPartLessThan8x8Flag for P8x8 mode
+ currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) ||
+ (currMB->b8mode[i]==4);
+ }
+ }
+ else
+ {
+ for (i = 0; i < 4; ++i)
+ {
+ int value = readB8_typeInfo_CABAC(currSlice, &dP->de_cabac);
+ SetB8Mode (currMB, value, i);
+
+ //set NoMbPartLessThan8x8Flag for P8x8 mode
+ currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && p_Vid->active_sps->direct_8x8_inference_flag) ||
+ (currMB->b8mode[i]==4);
+ }
+ }
+
+ //--- init macroblock data ---
+ init_macroblock (currMB);
+ currSlice->read_motion_info_from_NAL (currMB);
+ }
+
+ //============= Transform Size Flag for INTRA MBs =============
+ //-------------------------------------------------------------
+ //transform size flag for INTRA_4x4 and INTRA_8x8 modes
+ if (currMB->mb_type == I4MB && p_Vid->Transform8x8Mode)
+ {
+ dP = &(currSlice->partArr[partMap[SE_HEADER]]);
+ TRACE_STRING("transform_size_8x8_flag");
+
+ // read CAVLC transform_size_8x8_flag
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ currMB->luma_transform_size_8x8_flag = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+ else
+ {
+ currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &dP->de_cabac);
+ }
+
+
+ if (currMB->luma_transform_size_8x8_flag)
+ {
+ currMB->mb_type = I8MB;
+ memset(&currMB->b8mode, I8MB, 4 * sizeof(char));
+ memset(&currMB->b8pdir, -1, 4 * sizeof(char));
+ }
+ }
+ else
+ {
+ currMB->luma_transform_size_8x8_flag = FALSE;
+ }
+
+ if(p_Vid->active_pps->constrained_intra_pred_flag) // inter frame
+ {
+ if( !IS_INTRA(currMB) )
+ {
+ p_Vid->intra_block[mb_nr] = 0;
+ }
+ }
+
+ //--- init macroblock data ---
+ if (currMB->mb_type != P8x8)
+ init_macroblock(currMB);
+
+ if (IS_DIRECT (currMB) && p_Vid->cod_counter >= 0)
+ {
+ currMB->cbp = 0;
+ reset_coeffs(currSlice);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag ==CABAC)
+ p_Vid->cod_counter=-1;
+ }
+ else if (IS_SKIP (currMB)) //keep last macroblock
+ {
+ skip_macroblock(currMB);
+ }
+ else if(currMB->mb_type != IPCM)
+ {
+ // intra prediction modes for a macroblock 4x4 **********************************************
+ if (IS_INTRA(currMB))
+ read_ipred_modes(currMB);
+
+ // read inter frame vector data *********************************************************
+ if (IS_INTERMV (currMB) && (currMB->mb_type != P8x8))
+ {
+ currSlice->read_motion_info_from_NAL (currMB);
+ }
+ // read CBP and Coeffs ***************************************************************
+ currSlice->read_CBP_and_coeffs_from_NAL (currMB);
+ }
+ else
+ {
+ //read pcm_alignment_zero_bit and pcm_byte[i]
+
+ // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the
+ // same category as MBTYPE
+ if ( currSlice->dp_mode && currSlice->dpB_NotPresent )
+ {
+ concealIPCMcoeffs(currMB);
+ }
+ else
+ {
+ dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]);
+ read_IPCM_coeffs_from_NAL(currSlice, dP);
+ }
+ }
+
+ return;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Initialize decoding engine after decoding an IPCM macroblock
+* (for IPCM CABAC 28/11/2003)
+*
+* \author
+* Dong Wang <Dong.Wang@bristol.ac.uk>
+************************************************************************
+*/
+static void init_decoding_engine_IPCM(Slice *currSlice)
+{
+ Bitstream *currStream;
+ int ByteStartPosition;
+ int PartitionNumber;
+ int i;
+
+ if(currSlice->dp_mode==PAR_DP_1)
+ PartitionNumber=1;
+ else if(currSlice->dp_mode==PAR_DP_3)
+ PartitionNumber=3;
+ else
+ {
+ printf("Partition Mode is not supported\n");
+ exit(1);
+ }
+
+ for(i=0;i<PartitionNumber;++i)
+ {
+ currStream = currSlice->partArr[i].bitstream;
+ ByteStartPosition = currStream->read_len;
+
+ arideco_start_decoding (&currSlice->partArr[i].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len);
+ }
+}
+
+
+
+
+/*!
+************************************************************************
+* \brief
+* Read IPCM pcm_alignment_zero_bit and pcm_byte[i] from stream to currSlice->ipcm
+* (for IPCM CABAC and IPCM CAVLC)
+*
+* \author
+* Dong Wang <Dong.Wang@bristol.ac.uk>
+************************************************************************
+*/
+
+static void read_IPCM_coeffs_from_NAL(Slice *currSlice, struct datapartition *dP)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int i,j;
+
+ //For CABAC, we don't need to read bits to let stream byte aligned
+ // because we have variable for integer bytes position
+ if(p_Vid->active_pps->entropy_coding_mode_flag == CABAC)
+ {
+ readIPCM_CABAC(currSlice, dP);
+ init_decoding_engine_IPCM(currSlice);
+ }
+ else
+ {
+ //read bits to let stream byte aligned
+
+ if(((dP->bitstream->frame_bitoffset) & 0x07) != 0)
+ {
+ TRACE_STRING("pcm_alignment_zero_bit");
+ readSyntaxElement_FLC(dP->bitstream, (8 - ((dP->bitstream->frame_bitoffset) & 0x07)));
+ }
+
+ //read luma and chroma IPCM coefficients
+ TRACE_STRING("pcm_sample_luma");
+
+ for(i=0;i<MB_BLOCK_SIZE;++i)
+ {
+ for(j=0;j<MB_BLOCK_SIZE;++j)
+ {
+ currSlice->ipcm[0][i][j] = readSyntaxElement_FLC(dP->bitstream, p_Vid->bitdepth_luma);
+ }
+ }
+ if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid))
+ {
+ TRACE_STRING("pcm_sample_chroma (u)");
+ for(i=0;i<p_Vid->mb_cr_size_y;++i)
+ {
+ for(j=0;j<p_Vid->mb_cr_size_x;++j)
+ {
+ currSlice->ipcm[1][i][j] = readSyntaxElement_FLC(dP->bitstream, p_Vid->bitdepth_chroma);
+ }
+ }
+ TRACE_STRING("pcm_sample_chroma (v)");
+ for(i=0;i<p_Vid->mb_cr_size_y;++i)
+ {
+ for(j=0;j<p_Vid->mb_cr_size_x;++j)
+ {
+ currSlice->ipcm[2][i][j] = readSyntaxElement_FLC(dP->bitstream, p_Vid->bitdepth_chroma);
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* If data partition B is lost, conceal PCM sample values with DC.
+*
+************************************************************************
+*/
+
+
+static void __forceinline read_ipred_iblock(VideoParameters *p_Vid, Macroblock *currMB, Slice *currSlice, DataPartition *dP, int b8)
+{
+ int i, j;
+ int mostProbableIntraPredMode;
+ int upIntraPredMode;
+ int leftIntraPredMode;
+ int bx, by, bi, bj;
+ SyntaxElement currSE;
+ int ts, ls;
+ PixelPos left_block, top_block;
+ int dec;
+
+ for(j=0;j<2;j++) //loop subblocks
+ {
+ by = (b8&2) + j;
+ bj = currMB->block_y + by;
+ for(i=0;i<2;i++)
+ {
+ int pred_mode;
+ bx = ((b8&1)<<1) + i;
+ bi = currMB->block_x + bx;
+
+ //get from stream
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ readSyntaxElement_Intra4x4PredictionMode(&currSE, dP->bitstream);
+ pred_mode = currSE.value1;
+ }
+ else
+ {
+ pred_mode = readIntraPredMode_CABAC(currSlice, &dP->de_cabac);
+ }
+
+ p_Vid->getNeighbourXPLumaNB(currMB, (bx<<2) - 1, (by<<2), &left_block);
+ p_Vid->getNeighbourPXLumaNB(currMB, (bx<<2), (by<<2) - 1, &top_block );
+
+ //get from array and decode
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ left_block.available = left_block.available ? p_Vid->intra_block[left_block.mb_addr] : 0;
+ top_block.available = top_block.available ? p_Vid->intra_block[top_block.mb_addr] : 0;
+ }
+
+ // !! KS: not sure if the following is still correct...
+ ts = ls = 0; // Check to see if the neighboring block is SI
+ if (currMB->mb_type == I4MB && currSlice->slice_type == SI_SLICE) // need support for MBINTLC1
+ {
+ if (left_block.available)
+ if (p_Vid->siblock [left_block.mb_addr / p_Vid->PicWidthInMbs][left_block.mb_addr % p_Vid->PicWidthInMbs])
+ ls=1;
+
+ if (top_block.available)
+ if (p_Vid->siblock [top_block.mb_addr / p_Vid->PicWidthInMbs][top_block.mb_addr % p_Vid->PicWidthInMbs])
+ ts=1;
+ }
+
+ upIntraPredMode = (top_block.available &&(ts == 0)) ? p_Vid->ipredmode[top_block.pos_y>>2 ][top_block.pos_x>>2 ] : -1;
+ leftIntraPredMode = (left_block.available &&(ls == 0)) ? p_Vid->ipredmode[left_block.pos_y>>2][left_block.pos_x>>2] : -1;
+
+ mostProbableIntraPredMode = (upIntraPredMode < 0 || leftIntraPredMode < 0) ? DC_PRED : upIntraPredMode < leftIntraPredMode ? upIntraPredMode : leftIntraPredMode;
+
+ dec = (pred_mode == -1) ? mostProbableIntraPredMode : pred_mode + (pred_mode >= mostProbableIntraPredMode);
+
+
+ p_Vid->ipredmode[bj][bi] = dec;
+ }
+ }
+}
+
+static void __forceinline read_ipred_i8mb(VideoParameters *p_Vid, Macroblock *currMB, Slice *currSlice, DataPartition *dP, int b8)
+{
+ int mostProbableIntraPredMode;
+ int upIntraPredMode;
+ int leftIntraPredMode;
+ int bx, by, bi, bj;
+ int pred_mode;
+ SyntaxElement currSE;
+ int ts, ls;
+ PixelPos left_block, top_block;
+ int dec;
+
+ by = (b8&2);
+ bj = currMB->block_y + by;
+
+ bx = ((b8&1)<<1);
+ bi = currMB->block_x + bx;
+
+ //get from stream
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ readSyntaxElement_Intra4x4PredictionMode(&currSE, dP->bitstream);
+ pred_mode = currSE.value1;
+ }
+ else
+ {
+ pred_mode = readIntraPredMode_CABAC(currSlice, &dP->de_cabac);
+ }
+
+ p_Vid->getNeighbourXPLumaNB(currMB, (bx<<2) - 1, (by<<2), &left_block);
+ p_Vid->getNeighbourPXLumaNB(currMB, (bx<<2), (by<<2) - 1, &top_block );
+
+ //get from array and decode
+
+ if (p_Vid->active_pps->constrained_intra_pred_flag)
+ {
+ left_block.available = left_block.available ? p_Vid->intra_block[left_block.mb_addr] : 0;
+ top_block.available = top_block.available ? p_Vid->intra_block[top_block.mb_addr] : 0;
+ }
+
+ // !! KS: not sure if the following is still correct...
+ ts = ls = 0; // Check to see if the neighboring block is SI
+ if (currMB->mb_type == I4MB && currSlice->slice_type == SI_SLICE) // need support for MBINTLC1
+ {
+ if (left_block.available)
+ if (p_Vid->siblock [left_block.mb_addr / p_Vid->PicWidthInMbs][left_block.mb_addr % p_Vid->PicWidthInMbs])
+ ls=1;
+
+ if (top_block.available)
+ if (p_Vid->siblock [top_block.mb_addr / p_Vid->PicWidthInMbs][top_block.mb_addr % p_Vid->PicWidthInMbs])
+ ts=1;
+ }
+
+ upIntraPredMode = (top_block.available &&(ts == 0)) ? p_Vid->ipredmode[top_block.pos_y>>2 ][top_block.pos_x>>2 ] : -1;
+ leftIntraPredMode = (left_block.available &&(ls == 0)) ? p_Vid->ipredmode[left_block.pos_y>>2][left_block.pos_x>>2] : -1;
+
+ mostProbableIntraPredMode = (upIntraPredMode < 0 || leftIntraPredMode < 0) ? DC_PRED : upIntraPredMode < leftIntraPredMode ? upIntraPredMode : leftIntraPredMode;
+
+ dec = (pred_mode == -1) ? mostProbableIntraPredMode : pred_mode + (pred_mode >= mostProbableIntraPredMode);
+
+ //set
+ p_Vid->ipredmode[bj][bi] = dec;
+ p_Vid->ipredmode[bj][bi+1] = dec;
+ p_Vid->ipredmode[bj+1][bi] = dec;
+ p_Vid->ipredmode[bj+1][bi+1] = dec;
+}
+
+static void read_ipred_modes(Macroblock *currMB)
+{
+ int b8;
+ SyntaxElement currSE;
+ DataPartition *dP;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ char IntraChromaPredModeFlag = IS_INTRA(currMB);
+
+ dP = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+
+ for(b8 = 0; b8 < 4; ++b8) //loop 8x8 blocks
+ {
+ if (currMB->b8mode[b8]==IBLOCK)
+ {
+ IntraChromaPredModeFlag = 1;
+ read_ipred_iblock(p_Vid, currMB, currSlice, dP, b8);
+ }
+ else if (currMB->b8mode[b8]==I8MB)
+ {
+ IntraChromaPredModeFlag = 1;
+ read_ipred_i8mb(p_Vid, currMB, currSlice, dP, b8);
+ }
+ }
+
+ if (IntraChromaPredModeFlag && (dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ TRACE_STRING("intra_chroma_pred_mode");
+ dP = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ currSE.mapping = linfo_ue;
+ readSyntaxElement_UVLC(&currSE, dP);
+ currMB->c_ipred_mode = (char) currSE.value1;
+ }
+ else
+ {
+ currMB->c_ipred_mode = readCIPredMode_CABAC(currMB, &dP->de_cabac);
+ }
+
+
+
+ if (currMB->c_ipred_mode < DC_PRED_8 || currMB->c_ipred_mode > PLANE_8)
+ {
+ error("illegal chroma intra pred mode!\n", 600);
+ }
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Get current block spatial neighbors
+************************************************************************
+*/
+void get_neighbors(Macroblock *currMB, // <-- current Macroblock
+ PixelPos *block, // <--> neighbor blocks
+ int mb_x, // <-- block x position
+ int mb_y, // <-- block y position
+ int blockshape_x // <-- block width
+ )
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i;
+ p_Vid->getNeighbourXPLumaNB(currMB, mb_x - 1, mb_y , &block[0]); // left
+ p_Vid->getNeighbourPXLumaNB(currMB, mb_x, mb_y - 1, &block[1]); // up
+ p_Vid->getNeighbourPXLuma(currMB, mb_x + blockshape_x, mb_y - 1, &block[2]); // upper right
+ p_Vid->getNeighbourLuma(currMB, mb_x - 1, mb_y - 1, &block[3]); // upper left
+ for (i = 0; i < 4; i++)
+ {
+ block[i].pos_x >>= 2;
+ block[i].pos_y >>= 2;
+ }
+
+ if (mb_y > 0)
+ {
+ if (mb_x < 8) // first column of 8x8 blocks
+ {
+ if (mb_y == 8 )
+ {
+ if (blockshape_x == MB_BLOCK_SIZE)
+ block[2].available = 0;
+ }
+ else if (mb_x+blockshape_x == 8)
+ {
+ block[2].available = 0;
+ }
+ }
+ else if (mb_x + blockshape_x == MB_BLOCK_SIZE)
+ {
+ block[2].available = 0;
+ }
+ }
+
+ if (!block[2].available)
+ {
+ block[2] = block[3];
+ }
+}
+
+/* this version is for mb_x == 0, mb_y == 0 and blockshape_x == 16 */
+void get_neighbors0016(Macroblock *currMB, // <-- current Macroblock
+ PixelPos *block // <--> neighbor blocks
+ )
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int i;
+
+ p_Vid->getNeighbourLeftLuma(currMB, &block[0]); // left
+ p_Vid->getNeighbourPXLumaNB(currMB, 0, -1, &block[1]); // up
+ p_Vid->getNeighbourPXLuma(currMB, 16, -1, &block[2]); // upper right
+ p_Vid->getNeighbourLuma(currMB, -1, -1, &block[3]); // upper left
+ for (i = 0; i < 4; i++)
+ {
+ if (block[i].available)
+ {
+ block[i].pos_x >>= 2;
+ block[i].pos_y >>= 2;
+ }
+ }
+
+ if (!block[2].available)
+ {
+ block[2] = block[3];
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Read motion info
+************************************************************************
+*/
+static void read_motion_info_from_NAL_p_slice(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+
+ int mb_nr = currMB->mbAddrX;
+
+ DataPartition *dP = NULL;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ int partmode = ((currMB->mb_type == P8x8) ? 4 : currMB->mb_type);
+ int step_h0 = BLOCK_STEP [partmode][0];
+ int step_v0 = BLOCK_STEP [partmode][1];
+ h264_ref_t *pic_num;
+
+ int j4;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+
+ int list_offset = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field))? (mb_nr&0x01) ? 4 : 2 : 0;
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ SyntaxElement currSE;
+ //===== READ REFERENCE PICTURE INDICES =====
+ dP = &(currSlice->partArr[partMap[SE_REFFRAME]]);
+ // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB ***********
+ prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l0_active, (currMB->mb_type != P8x8) || (!p_Vid->allrefzero));
+ readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+
+ // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB ***********
+ prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l1_active, (currMB->mb_type != P8x8) || (!p_Vid->allrefzero));
+ readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+
+ //===== READ MOTION VECTORS =====
+ dP = &(currSlice->partArr[partMap[SE_MVD]]);
+
+ currSE.mapping = linfo_se;
+ readMBMotionVectors (&currSE, dP, currMB, LIST_0, step_h0, step_v0);
+ }
+ else
+ {
+ if (currMB->mb_type != P8x8 || !p_Vid->allrefzero)
+ {
+ //===== READ REFERENCE PICTURE INDICES =====
+ dP = &(currSlice->partArr[partMap[SE_REFFRAME]]);
+ if (currSlice->num_ref_idx_l0_active > 1)
+ {
+ // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB ***********
+ readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+ }
+ else
+ {
+ readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+ }
+
+ if (currSlice->num_ref_idx_l1_active > 1)
+ {
+ // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB ***********
+ readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+ }
+ else
+ {
+ readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+ }
+ }
+ else
+ {
+ readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+ readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+ }
+ //===== READ MOTION VECTORS =====
+ dP = &(currSlice->partArr[partMap[SE_MVD]]);
+
+ readMBMotionVectors_CABAC(dP, currMB, LIST_0, step_h0, step_v0);
+
+ }
+
+ // LIST_0 Motion vectors
+
+
+ // record reference picture Ids for deblocking decisions
+ pic_num = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset];
+ for(j4 = currMB->block_y; j4 < (currMB->block_y +4);++j4)
+ {
+ PicMotion *ref = &motion->motion[LIST_0][j4][currMB->block_x];
+ ref[0].ref_pic_id = (ref[0].ref_idx >= 0)?pic_num[(short)ref[0].ref_idx]:UNDEFINED_REFERENCE;
+ ref[1].ref_pic_id = (ref[1].ref_idx >= 0)?pic_num[(short)ref[1].ref_idx]:UNDEFINED_REFERENCE;
+ ref[2].ref_pic_id = (ref[2].ref_idx >= 0)?pic_num[(short)ref[2].ref_idx]:UNDEFINED_REFERENCE;
+ ref[3].ref_pic_id = (ref[3].ref_idx >= 0)?pic_num[(short)ref[3].ref_idx]:UNDEFINED_REFERENCE;
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Read motion info
+************************************************************************
+*/
+static void read_motion_info_from_NAL_b_slice (Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ int i,j,k;
+ int mb_nr = currMB->mbAddrX;
+ DataPartition *dP = NULL;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ int partmode = ((currMB->mb_type == P8x8) ? 4 : currMB->mb_type);
+ int step_h0 = BLOCK_STEP [partmode][0];
+ int step_v0 = BLOCK_STEP [partmode][1];
+
+ int i0, j0, j6;
+
+ int j4, i4, ii;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PicMotionParams *motion = &dec_picture->motion;
+ MotionParams *colocated;
+
+ int mv_scale = 0;
+
+ int list_offset = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field))? (mb_nr&0x01) ? 4 : 2 : 0;
+
+
+ if ((currSlice->mb_aff_frame_flag) && (currMB->mb_field))
+ {
+ if(mb_nr&0x01)
+ {
+ colocated = &currSlice->p_colocated->bottom;
+ }
+ else
+ {
+ colocated = &currSlice->p_colocated->top;
+ }
+ }
+ else
+ {
+ colocated = &currSlice->p_colocated->frame;
+ }
+
+ if (currMB->mb_type == P8x8)
+ {
+ if (currSlice->direct_spatial_mv_pred_flag)
+ {
+ char l0_rFrame, l1_rFrame;
+ short pmvl0[2]={0,0}, pmvl1[2]={0,0};
+
+ prepare_direct_params(currMB, dec_picture, pmvl0, pmvl1, &l0_rFrame, &l1_rFrame);
+
+ for (k = 0; k < 4; ++k)
+ {
+ if (currMB->b8mode[k] == 0)
+ {
+ i = currMB->block_x + 2 * (k & 0x01);
+ for(j = 2 * (k >> 1); j < 2 * (k >> 1)+2;++j)
+ {
+ j6 = currMB->block_y_aff + j;
+ j4 = currMB->block_y + j;
+ for(i4 = i; i4 < i + 2; ++i4)
+ {
+ if (l0_rFrame >= 0)
+ {
+ if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0];
+ motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1];
+ motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = -1;
+ }
+
+ if (l1_rFrame >= 0)
+ {
+ if (l1_rFrame==0 && ((!colocated->moving_block[j6][i4])&& (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0];
+ motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1];
+ motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = -1;
+ }
+
+ if (l0_rFrame <0 && l1_rFrame <0)
+ {
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (k = 0; k < 4; ++k) // Scan all blocks
+ {
+ if (currMB->b8mode[k] == 0)
+ {
+ for(j0 = 2 * (k >> 1); j0 < 2 * (k >> 1) + 2; j0 += step_v0)
+ {
+ for(i0 = currMB->block_x + 2*(k & 0x01); i0 < currMB->block_x + 2 * (k & 0x01)+2; i0 += step_h0)
+ {
+ int refList = colocated->motion[LIST_0 ][currMB->block_y_aff + j0][i0].ref_idx== -1 ? LIST_1 : LIST_0;
+ int ref_idx = colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_idx;
+ int mapped_idx = -1, iref;
+
+ if (ref_idx == -1)
+ {
+ for (j4 = currMB->block_y + j0; j4 < currMB->block_y + j0 + step_v0; ++j4)
+ {
+ int h;
+ for (h=0;h<step_h0;h++)
+ {
+ PicMotion *m0 = &motion->motion[LIST_0][j4][i0+h];
+ PicMotion *m1 = &motion->motion[LIST_1][j4][i0+h];
+ m0->ref_idx = 0;
+ m1->ref_idx = 0;
+ memset(&m0->mv, 0, sizeof(MotionVector));
+ memset(&m1->mv, 0, sizeof(MotionVector));
+ }
+ }
+ }
+ else
+ {
+ for (iref = 0; iref < imin(currSlice->num_ref_idx_l0_active, p_Vid->listXsize[LIST_0 + list_offset]); ++iref)
+ {
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+
+ if(p_Vid->structure==0 && curr_mb_field==0)
+ {
+ // If the current MB is a frame MB and the colocated is from a field picture,
+ // then the colocated->ref_pic_id may have been generated from the wrong value of
+ // frame_poc if it references it's complementary field, so test both POC values
+ if(p_Vid->listX[0][iref]->top_poc * 2 == colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_pic_id
+ || p_Vid->listX[0][iref]->bottom_poc * 2 == colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_pic_id)
+ {
+ mapped_idx=iref;
+ break;
+ }
+ else //! invalid index. Default to zero even though this case should not happen
+ mapped_idx=INVALIDINDEX;
+ continue;
+ }
+ if (dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][iref]==colocated->motion[refList][currMB->block_y_aff + j0][i0].ref_pic_id)
+ {
+ mapped_idx=iref;
+ break;
+ }
+ else //! invalid index. Default to zero even though this case should not happen
+ mapped_idx=INVALIDINDEX;
+ }
+
+ if (INVALIDINDEX == mapped_idx)
+ {
+ error("temporal direct error: colocated block has ref that is unavailable",-1111);
+ }
+
+ for (j = j0; j < j0 + step_v0; ++j)
+ {
+ j4 = currMB->block_y + j;
+ j6 = currMB->block_y_aff + j;
+
+ for (i4 = i0; i4 < i0 + step_h0; ++i4)
+ {
+ mv_scale = currSlice->mvscale[LIST_0 + list_offset][mapped_idx];
+
+ motion->motion[LIST_0][j4][i4].ref_idx = (char) mapped_idx;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+
+ if (mv_scale == 9999 || p_Vid->listX[LIST_0+list_offset][mapped_idx]->is_long_term)
+ {
+ for (ii=0; ii < 2; ++ii)
+ {
+ motion->motion[LIST_0][j4][i4].mv[ii] = colocated->motion[refList][j6][i4].mv[ii];
+ motion->motion[LIST_1][j4][i4].mv[ii] = 0;
+ }
+ }
+ else
+ {
+ for (ii=0; ii < 2; ++ii)
+ {
+ motion->motion[LIST_0][j4][i4].mv[ii] = (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[ii] + 128 ) >> 8);
+ motion->motion[LIST_1][j4][i4].mv[ii] = (short) (motion->motion[LIST_0][j4][i4].mv[ii] - colocated->motion[refList][j6][i4].mv[ii]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+
+ if (p_Vid->active_pps->entropy_coding_mode_flag == CAVLC)
+ {
+ SyntaxElement currSE;
+ //===== READ REFERENCE PICTURE INDICES =====
+ dP = &(currSlice->partArr[partMap[SE_REFFRAME]]);
+ // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB ***********
+ prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l0_active, TRUE);
+ readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+
+ // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB ***********
+ prepareListforRefIdx (currMB, &currSE, currSlice->num_ref_idx_l1_active, TRUE);
+ readMBRefPictureIdx (&currSE, dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+
+ //===== READ MOTION VECTORS =====
+ dP = &(currSlice->partArr[partMap[SE_MVD]]);
+
+ currSE.mapping = linfo_se;
+ // LIST_0 Motion vectors
+ readMBMotionVectors (&currSE, dP, currMB, LIST_0, step_h0, step_v0);
+ // LIST_1 Motion vectors
+ readMBMotionVectors (&currSE, dP, currMB, LIST_1, step_h0, step_v0);
+ }
+ else
+ {
+ //===== READ REFERENCE PICTURE INDICES =====
+ dP = &(currSlice->partArr[partMap[SE_REFFRAME]]);
+ if (currSlice->num_ref_idx_l0_active>1)
+ {
+ // For LIST_0, if multiple ref. pictures, read LIST_0 reference picture indices for the MB ***********
+ readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+ }
+ else
+ {
+ readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_0][currMB->block_y], LIST_0, step_v0, step_h0);
+ }
+
+ if (currSlice->num_ref_idx_l1_active > 1)
+ {
+ // For LIST_1, if multiple ref. pictures, read LIST_1 reference picture indices for the MB ***********
+ readMBRefPictureIdx_CABAC(dP, currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+ }
+ else
+ {
+ readMBRefPictureIdx_CABAC_NoReference(currMB, &motion->motion[LIST_1][currMB->block_y], LIST_1, step_v0, step_h0);
+ }
+
+ //===== READ MOTION VECTORS =====
+ dP = &(currSlice->partArr[partMap[SE_MVD]]);
+
+ // LIST_0 Motion vectors
+ readMBMotionVectors_CABAC(dP, currMB, LIST_0, step_h0, step_v0);
+ // LIST_1 Motion vectors
+ readMBMotionVectors_CABAC(dP, currMB, LIST_1, step_h0, step_v0);
+ }
+
+
+
+ // record reference picture Ids for deblocking decisions
+
+ for (k = LIST_0; k <= LIST_1; ++k)
+ {
+ const h264_ref_t *rec_pic_num = dec_picture->ref_pic_num[p_Vid->current_slice_nr][k+list_offset];
+ PicMotion **list_motion = &motion->motion[k][currMB->block_y];
+ for(j4 = 0; j4 < 4 ;++j4)
+ {
+ PicMotion *m = &list_motion[j4][currMB->block_x];
+ m[0].ref_pic_id = (m[0].ref_idx>=0)?rec_pic_num[(short)m[0].ref_idx]:UNDEFINED_REFERENCE;
+ m[1].ref_pic_id = (m[1].ref_idx>=0)?rec_pic_num[(short)m[1].ref_idx]:UNDEFINED_REFERENCE;
+ m[2].ref_pic_id = (m[2].ref_idx>=0)?rec_pic_num[(short)m[2].ref_idx]:UNDEFINED_REFERENCE;
+ m[3].ref_pic_id = (m[3].ref_idx>=0)?rec_pic_num[(short)m[3].ref_idx]:UNDEFINED_REFERENCE;
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Get the Prediction from the Neighboring Blocks for Number of
+* Nonzero Coefficients
+*
+* Luma Blocks
+************************************************************************
+*/
+static int predict_nnz_cb(Macroblock *currMB, int i,int j)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ PixelPos pix;
+
+ int pred_nnz = 0;
+ int cnt = 0;
+
+ // left block
+ p_Vid->getNeighbourLuma(currMB, i - 1, j, &pix);
+
+ if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ // top block
+ p_Vid->getNeighbourLuma(currMB, i, j - 1, &pix);
+
+ if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ if (cnt==2)
+ {
+ ++pred_nnz;
+ pred_nnz>>=1;
+ }
+
+ return pred_nnz;
+}
+
+
+static int predict_nnz_cr(Macroblock *currMB, int i,int j)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ PixelPos pix;
+
+ int pred_nnz = 0;
+ int cnt = 0;
+
+ // left block
+ p_Vid->getNeighbourLuma(currMB, i - 1, j, &pix);
+
+ if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][2][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ // top block
+ p_Vid->getNeighbourLuma(currMB, i, j - 1, &pix);
+
+ if (IS_INTRA(currMB) && pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][2][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ if (cnt==2)
+ {
+ ++pred_nnz;
+ pred_nnz>>=1;
+ }
+
+ return pred_nnz;
+}
+
+
+static int predict_nnz_luma(Macroblock *currMB, int i,int j)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ PixelPos pix;
+
+ int pred_nnz = 0;
+ int cnt = 0;
+
+ // left block
+ p_Vid->getNeighbourXPLuma(currMB, i - 1, j, &pix);
+
+ if (pix.available)
+ {
+ pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ // top block
+ p_Vid->getNeighbourPXLuma(currMB, i, j - 1, &pix);
+
+ if (pix.available)
+ {
+ pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ if (cnt==2)
+ {
+ ++pred_nnz;
+ pred_nnz>>=1;
+ }
+
+ return pred_nnz;
+}
+
+
+static int predict_nnz_luma_intra(Macroblock *currMB, int i,int j)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ PixelPos pix;
+
+ int pred_nnz = 0;
+ int cnt = 0;
+
+ // left block
+ p_Vid->getNeighbourXPLuma(currMB, i - 1, j, &pix);
+
+ if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ // top block
+ p_Vid->getNeighbourPXLuma(currMB, i, j - 1, &pix);
+
+ if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][0][pix.y>>2][pix.x>>2];
+ ++cnt;
+ }
+
+ if (cnt==2)
+ {
+ ++pred_nnz;
+ pred_nnz>>=1;
+ }
+
+ return pred_nnz;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Get the Prediction from the Neighboring Blocks for Number of
+* Nonzero Coefficients
+*
+* Chroma Blocks
+************************************************************************
+*/
+static int predict_nnz_chroma_inter(Macroblock *currMB, int i,int j)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PixelPos pix;
+
+ int pred_nnz = 0;
+ int cnt = 0;
+
+ if (dec_picture->chroma_format_idc != YUV444)
+ {
+ //YUV420 and YUV422
+ // left block
+ p_Vid->getNeighbour(currMB, ((i&0x01)<<2) - 1, j, p_Vid->mb_size[IS_CHROMA], &pix);
+ if (pix.available)
+ {
+ pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)];
+ ++cnt;
+ }
+
+ // top block
+ p_Vid->getNeighbour(currMB, ((i&0x01)<<2), j - 1, p_Vid->mb_size[IS_CHROMA], &pix);
+ if (pix.available)
+ {
+ pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)];
+ ++cnt;
+ }
+
+ if (cnt==2)
+ {
+ ++pred_nnz;
+ pred_nnz >>= 1;
+ }
+ }
+
+ return pred_nnz;
+}
+
+
+static int predict_nnz_chroma_intra(Macroblock *currMB, int i,int j)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ PixelPos pix;
+
+ int pred_nnz = 0;
+ int cnt = 0;
+
+ if (dec_picture->chroma_format_idc != YUV444)
+ {
+ //YUV420 and YUV422
+ // left block
+ p_Vid->getNeighbour(currMB, ((i&0x01)<<2) - 1, j, p_Vid->mb_size[IS_CHROMA], &pix);
+
+ if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz = p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)];
+ ++cnt;
+ }
+
+ // top block
+ p_Vid->getNeighbour(currMB, ((i&0x01)<<2), j - 1, p_Vid->mb_size[IS_CHROMA], &pix);
+
+ if (pix.available && p_Vid->active_pps->constrained_intra_pred_flag && (p_Vid->currentSlice->dp_mode==PAR_DP_3))
+ {
+ pix.available &= p_Vid->intra_block[pix.mb_addr];
+ if (!pix.available)
+ ++cnt;
+ }
+
+ if (pix.available)
+ {
+ pred_nnz += p_Vid->nz_coeff [pix.mb_addr ][1][pix.y>>2][2 * (i>>1) + (pix.x>>2)];
+ ++cnt;
+ }
+
+ if (cnt==2)
+ {
+ ++pred_nnz;
+ pred_nnz >>= 1;
+ }
+ }
+
+ return pred_nnz;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Reads coeff of an 4x4 block (CAVLC)
+*
+* \author
+* Karl Lillevold <karll@real.com>
+* contributions by James Au <james@ubvideo.com>
+************************************************************************
+*/
+static void readCoeff4x4_CAVLC_Luma (Macroblock *currMB,
+ int i, int j, int levarr[16], int runarr[16],
+ int *number_coefficients)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int mb_nr = currMB->mbAddrX;
+ SyntaxElement currSE;
+ DataPartition *dP;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ Bitstream *currStream;
+
+ int k, code, vlcnum;
+ int numcoeff = 0, numtrailingones, numcoeff_vlc;
+ int level_two_or_higher;
+ int numones, totzeros, abslevel;
+ int zerosleft;
+ int nnz;
+ static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6
+
+ p_Vid->nz_coeff[mb_nr][0][j][i] = 0;
+
+ if (IS_INTRA (currMB))
+ {
+ dP = &(currSlice->partArr[partMap[SE_LUM_AC_INTRA]]);
+ nnz = predict_nnz_luma_intra(currMB, i<<2, j<<2);
+ }
+ else
+ {
+ dP = &(currSlice->partArr[partMap[SE_LUM_AC_INTER]]);
+ nnz = predict_nnz_luma(currMB, i<<2, j<<2);
+ }
+
+ if (nnz < 2)
+ {
+ numcoeff_vlc = 0;
+ }
+ else if (nnz < 4)
+ {
+ numcoeff_vlc = 1;
+ }
+ else if (nnz < 8)
+ {
+ numcoeff_vlc = 2;
+ }
+ else //
+ {
+ numcoeff_vlc = 3;
+ }
+
+ currStream = dP->bitstream;
+ readSyntaxElement_NumCoeffTrailingOnes(&currSE, currStream, numcoeff_vlc);
+
+ numcoeff = currSE.value1;
+ numtrailingones = currSE.value2;
+
+ p_Vid->nz_coeff[mb_nr][0][j][i] = (byte) numcoeff;
+
+ memzero64(levarr);
+ memzero64(runarr);
+
+ numones = numtrailingones;
+ *number_coefficients = numcoeff;
+
+ if (numcoeff)
+ {
+ if (numtrailingones)
+ {
+ code = readSyntaxElement_FLC(currStream, numtrailingones);
+
+ for (k=0;k<numtrailingones;k++)
+ {
+#ifdef _M_IX86
+ levarr[k+numcoeff-numtrailingones] = ((_bittest((const long *)&code, k)<<1) ^ 0xFFFFFFFF) + 2;
+#else
+ levarr[k+numcoeff-numtrailingones] = (code>>k)&1 ? -1:1;
+#endif
+ }
+ }
+
+ // decode levels
+ level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1;
+ vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0;
+
+ for (k = numcoeff - 1 - numtrailingones; k >= 0; k--)
+ {
+ int level;
+ if (vlcnum == 0)
+ level=readSyntaxElement_Level_VLC0(currStream);
+ else
+ level=readSyntaxElement_Level_VLCN(vlcnum, currStream);
+
+ if (level_two_or_higher)
+ {
+ level += (level > 0) ? 1 : -1;
+ level_two_or_higher = 0;
+ }
+
+ levarr[k] = level;
+ abslevel = iabs(levarr[k]);
+ if (abslevel == 1)
+ ++numones;
+
+ // update VLC table
+ if (abslevel > incVlc[vlcnum])
+ ++vlcnum;
+
+ if (k == numcoeff - 1 - numtrailingones && abslevel >3)
+ vlcnum = 2;
+ }
+
+ if (numcoeff < 16)
+ {
+ // decode total run
+ vlcnum = numcoeff - 1;
+ totzeros = readSyntaxElement_TotalZeros(currStream, vlcnum);
+ }
+ else
+ {
+ totzeros = 0;
+ }
+
+ // decode run before each coefficient
+ zerosleft = totzeros;
+ i = numcoeff - 1;
+
+ if (zerosleft > 0 && i > 0)
+ {
+ do
+ {
+ // select VLC for runbefore
+ vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1);
+
+ runarr[i] = readSyntaxElement_Run(currStream, vlcnum);
+
+ zerosleft -= runarr[i];
+ i --;
+ } while (zerosleft != 0 && i != 0);
+ }
+ runarr[i] = zerosleft;
+ } // if numcoeff
+}
+
+
+static void readCoeff4x4_CAVLC_ChromaAC(Macroblock *currMB,
+ int i, int j, int levarr[16], int runarr[16],
+ int *number_coefficients)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int mb_nr = currMB->mbAddrX;
+ SyntaxElement currSE;
+ DataPartition *dP;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ Bitstream *currStream;
+
+ int k, code, vlcnum;
+ int numcoeff = 0, numtrailingones, numcoeff_vlc;
+ int level_two_or_higher;
+ int numones, totzeros, abslevel;
+ int zerosleft, ntr;
+ int nnz;
+ static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6
+
+ TRACE_PRINTF("ChrDC");
+ p_Vid->nz_coeff[mb_nr][0][j][i] = 0;
+
+ if (IS_INTRA (currMB))
+ {
+ dP = &(currSlice->partArr[partMap[SE_CHR_AC_INTRA]]);
+ nnz = predict_nnz_chroma_intra(currMB, i, ((j-4)<<2));
+ }
+ else
+ {
+ dP = &(currSlice->partArr[partMap[SE_CHR_AC_INTER]]);
+ nnz = predict_nnz_chroma_inter(currMB, i, ((j-4)<<2));
+ }
+ currStream = dP->bitstream;
+
+
+ // luma or chroma AC
+
+ if (nnz < 2)
+ {
+ numcoeff_vlc = 0;
+ }
+ else if (nnz < 4)
+ {
+ numcoeff_vlc = 1;
+ }
+ else if (nnz < 8)
+ {
+ numcoeff_vlc = 2;
+ }
+ else //
+ {
+ numcoeff_vlc = 3;
+ }
+
+ readSyntaxElement_NumCoeffTrailingOnes(&currSE, currStream, numcoeff_vlc);
+
+ numcoeff = currSE.value1;
+ numtrailingones = currSE.value2;
+
+
+ p_Vid->nz_coeff[mb_nr][0][j][i] = (byte) numcoeff;
+
+ memzero64(levarr);
+ memzero64(runarr);
+
+ numones = numtrailingones;
+ *number_coefficients = numcoeff;
+
+ if (numcoeff)
+ {
+ if (numtrailingones)
+ {
+ code = readSyntaxElement_FLC (currStream, numtrailingones);
+
+ ntr = numtrailingones;
+ for (k = numcoeff - 1; k > numcoeff - 1 - numtrailingones; k--)
+ {
+ ntr --;
+ levarr[k] = (code>>ntr)&1 ? -1 : 1;
+ }
+ }
+
+ // decode levels
+ level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1;
+ vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0;
+
+ for (k = numcoeff - 1 - numtrailingones; k >= 0; k--)
+ {
+
+#if TRACE
+ snprintf(currSE.tracestring,
+ TRACESTRING_SIZE, "%s lev (%d,%d) k=%d vlc=%d ", type, i, j, k, vlcnum);
+#endif
+
+ int level;
+ if (vlcnum == 0)
+ level=readSyntaxElement_Level_VLC0(currStream);
+ else
+ level=readSyntaxElement_Level_VLCN(vlcnum, currStream);
+
+ if (level_two_or_higher)
+ {
+ level += (level > 0) ? 1 : -1;
+ level_two_or_higher = 0;
+ }
+
+ levarr[k] = level;
+ abslevel = iabs(levarr[k]);
+ if (abslevel == 1)
+ ++numones;
+
+ // update VLC table
+ if (abslevel > incVlc[vlcnum])
+ ++vlcnum;
+
+ if (k == numcoeff - 1 - numtrailingones && abslevel >3)
+ vlcnum = 2;
+ }
+
+ if (numcoeff < 15)
+ {
+ // decode total run
+ vlcnum = numcoeff - 1;
+ totzeros = readSyntaxElement_TotalZeros(currStream, vlcnum);
+ }
+ else
+ {
+ totzeros = 0;
+ }
+
+ // decode run before each coefficient
+ zerosleft = totzeros;
+ i = numcoeff - 1;
+
+ if (zerosleft > 0 && i > 0)
+ {
+ do
+ {
+ // select VLC for runbefore
+ vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1);
+
+ runarr[i] = readSyntaxElement_Run(currStream, vlcnum);
+
+ zerosleft -= runarr[i];
+ i --;
+ } while (zerosleft != 0 && i != 0);
+ }
+ runarr[i] = zerosleft;
+ } // if numcoeff
+}
+
+static void readCoeff4x4_CAVLC_ChromaDC(Macroblock *currMB, int i, int j, int levarr[16], int runarr[16], int *number_coefficients)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int mb_nr = currMB->mbAddrX;
+ SyntaxElement currSE;
+ DataPartition *dP;
+ Bitstream *currStream;
+
+ int k, code, vlcnum;
+ int numcoeff = 0, numtrailingones;
+ int level_two_or_higher;
+ int numones, totzeros, abslevel;
+ int zerosleft, ntr;
+ int max_coeff_num;
+ static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6
+
+ max_coeff_num = p_Vid->num_cdc_coeff;
+ TRACE_PRINTF("ChrDC");
+ p_Vid->nz_coeff[mb_nr][0][j][i] = 0;
+ if (IS_INTRA (currMB))
+ dP = &(currSlice->partArr[assignSE2partition[currSlice->dp_mode][SE_CHR_DC_INTRA]]);
+ else
+ dP = &(currSlice->partArr[assignSE2partition[currSlice->dp_mode][SE_CHR_DC_INTER]]);
+ currStream = dP->bitstream;
+
+ readSyntaxElement_NumCoeffTrailingOnesChromaDC(p_Vid, &currSE, currStream);
+
+ numcoeff = currSE.value1;
+ numtrailingones = currSE.value2;
+
+ memzero64(levarr);
+ memzero64(runarr);
+
+ numones = numtrailingones;
+ *number_coefficients = numcoeff;
+
+ if (numcoeff)
+ {
+ if (numtrailingones)
+ {
+ code = readSyntaxElement_FLC (currStream, numtrailingones);
+
+ ntr = numtrailingones;
+ for (k = numcoeff - 1; k > numcoeff - 1 - numtrailingones; k--)
+ {
+ ntr --;
+ levarr[k] = (code>>ntr)&1 ? -1 : 1;
+ }
+ }
+
+ // decode levels
+ level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1;
+ vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0;
+
+ for (k = numcoeff - 1 - numtrailingones; k >= 0; k--)
+ {
+ int level;
+ if (vlcnum == 0)
+ level=readSyntaxElement_Level_VLC0(currStream);
+ else
+ level=readSyntaxElement_Level_VLCN(vlcnum, currStream);
+
+ if (level_two_or_higher)
+ {
+ level += (level > 0) ? 1 : -1;
+ level_two_or_higher = 0;
+ }
+
+ levarr[k] = level;
+ abslevel = iabs(levarr[k]);
+ if (abslevel == 1)
+ ++numones;
+
+ // update VLC table
+ if (abslevel > incVlc[vlcnum])
+ ++vlcnum;
+
+ if (k == numcoeff - 1 - numtrailingones && abslevel >3)
+ vlcnum = 2;
+ }
+
+ if (numcoeff < max_coeff_num)
+ {
+ // decode total run
+ vlcnum = numcoeff - 1;
+ totzeros = readSyntaxElement_TotalZerosChromaDC(p_Vid, currStream, vlcnum);
+ }
+ else
+ {
+ totzeros = 0;
+ }
+
+ // decode run before each coefficient
+ zerosleft = totzeros;
+ i = numcoeff - 1;
+
+ if (zerosleft > 0 && i > 0)
+ {
+ do
+ {
+ // select VLC for runbefore
+ vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1);
+
+ runarr[i] = readSyntaxElement_Run(currStream, vlcnum);
+
+ zerosleft -= runarr[i];
+ i --;
+ } while (zerosleft != 0 && i != 0);
+ }
+ runarr[i] = zerosleft;
+ } // if numcoeff
+}
+
+static void readCoeff4x4_CAVLC(Macroblock *currMB, int block_type, int i, int j, int levarr[16], int runarr[16], int *number_coefficients)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int mb_nr = currMB->mbAddrX;
+ SyntaxElement currSE;
+ DataPartition *dP;
+ Bitstream *currStream;
+
+ int k, code, vlcnum;
+ int numcoeff = 0, numtrailingones, numcoeff_vlc;
+ int level_two_or_higher;
+ int numones, totzeros, abslevel;
+ int zerosleft, ntr, dptype = 0;
+ int max_coeff_num, nnz;
+ static const int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6
+
+ switch (block_type)
+ {
+ case LUMA:
+ readCoeff4x4_CAVLC_Luma(currMB, i, j, levarr, runarr, number_coefficients);
+ return;
+ case LUMA_INTRA16x16DC:
+ max_coeff_num = 16;
+ TRACE_PRINTF("Lum16DC");
+ dptype = SE_LUM_DC_INTRA;
+ p_Vid->nz_coeff[mb_nr][0][j][i] = 0;
+ break;
+ case LUMA_INTRA16x16AC:
+ max_coeff_num = 15;
+ TRACE_PRINTF("Lum16AC");
+ dptype = SE_LUM_AC_INTRA;
+ p_Vid->nz_coeff[mb_nr][0][j][i] = 0;
+ break;
+ case CB:
+ max_coeff_num = 16;
+ TRACE_PRINTF("Luma_add1");
+ dptype = (IS_INTRA (currMB)) ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER;
+ p_Vid->nz_coeff[mb_nr][1][j][i] = 0;
+ break;
+ case CB_INTRA16x16DC:
+ max_coeff_num = 16;
+ TRACE_PRINTF("Luma_add1_16DC");
+ dptype = SE_LUM_DC_INTRA;
+ p_Vid->nz_coeff[mb_nr][1][j][i] = 0;
+ break;
+ case CB_INTRA16x16AC:
+ max_coeff_num = 15;
+ TRACE_PRINTF("Luma_add1_16AC");
+ dptype = SE_LUM_AC_INTRA;
+ p_Vid->nz_coeff[mb_nr][1][j][i] = 0;
+ break;
+ case CR:
+ max_coeff_num = 16;
+ TRACE_PRINTF("Luma_add2");
+ dptype = (IS_INTRA (currMB)) ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER;
+ p_Vid->nz_coeff[mb_nr][2][j][i] = 0;
+ break;
+ case CR_INTRA16x16DC:
+ max_coeff_num = 16;
+ TRACE_PRINTF("Luma_add2_16DC");
+ dptype = SE_LUM_DC_INTRA;
+ p_Vid->nz_coeff[mb_nr][2][j][i] = 0;
+ break;
+ case CR_INTRA16x16AC:
+ max_coeff_num = 15;
+ TRACE_PRINTF("Luma_add1_16AC");
+ dptype = SE_LUM_AC_INTRA;
+ p_Vid->nz_coeff[mb_nr][2][j][i] = 0;
+ break;
+ case CHROMA_DC:
+ readCoeff4x4_CAVLC_ChromaDC(currMB, i, j, levarr, runarr, number_coefficients);
+ return;
+ case CHROMA_AC:
+ readCoeff4x4_CAVLC_ChromaAC(currMB, i, j, levarr, runarr, number_coefficients);
+ return;
+ default:
+ error ("readCoeff4x4_CAVLC: invalid block type", 600);
+ p_Vid->nz_coeff[mb_nr][0][j][i] = 0;
+ break;
+ }
+
+ dP = &(currSlice->partArr[assignSE2partition[currSlice->dp_mode][dptype]]);
+ currStream = dP->bitstream;
+
+ // luma or chroma AC
+ if(block_type==LUMA_INTRA16x16DC || block_type==LUMA_INTRA16x16AC)
+ {
+ nnz = predict_nnz_luma_intra(currMB, i<<2, j<<2);
+ }
+ else if (block_type==CB || block_type==CB_INTRA16x16DC || block_type==CB_INTRA16x16AC)
+ {
+ nnz = predict_nnz_cb(currMB, i<<2, j<<2);
+ }
+ else
+ {
+ nnz = predict_nnz_cr(currMB, i<<2, j<<2);
+ }
+
+ if (nnz < 2)
+ {
+ numcoeff_vlc = 0;
+ }
+ else if (nnz < 4)
+ {
+ numcoeff_vlc = 1;
+ }
+ else if (nnz < 8)
+ {
+ numcoeff_vlc = 2;
+ }
+ else //
+ {
+ numcoeff_vlc = 3;
+ }
+
+ readSyntaxElement_NumCoeffTrailingOnes(&currSE, currStream, numcoeff_vlc);
+
+ numcoeff = currSE.value1;
+ numtrailingones = currSE.value2;
+
+ if(block_type==LUMA_INTRA16x16DC || block_type==LUMA_INTRA16x16AC)
+ p_Vid->nz_coeff[mb_nr][0][j][i] = (byte) numcoeff;
+ else if (block_type==CB || block_type==CB_INTRA16x16DC || block_type==CB_INTRA16x16AC)
+ p_Vid->nz_coeff[mb_nr][1][j][i] = (byte) numcoeff;
+ else
+ p_Vid->nz_coeff[mb_nr][2][j][i] = (byte) numcoeff;
+
+
+ memzero64(levarr);
+ memzero64(runarr);
+
+ numones = numtrailingones;
+ *number_coefficients = numcoeff;
+
+ if (numcoeff)
+ {
+ if (numtrailingones)
+ {
+ code = readSyntaxElement_FLC(currStream, numtrailingones);
+
+ ntr = numtrailingones;
+ for (k = numcoeff - 1; k > numcoeff - 1 - numtrailingones; k--)
+ {
+ ntr --;
+ levarr[k] = (code>>ntr)&1 ? -1 : 1;
+ }
+ }
+
+ // decode levels
+ level_two_or_higher = (numcoeff > 3 && numtrailingones == 3)? 0 : 1;
+ vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0;
+
+ for (k = numcoeff - 1 - numtrailingones; k >= 0; k--)
+ {
+ int level;
+ if (vlcnum == 0)
+ level=readSyntaxElement_Level_VLC0(currStream);
+ else
+ level=readSyntaxElement_Level_VLCN(vlcnum, currStream);
+
+ if (level_two_or_higher)
+ {
+ level += (level > 0) ? 1 : -1;
+ level_two_or_higher = 0;
+ }
+
+ levarr[k] = level;
+ abslevel = iabs(levarr[k]);
+ if (abslevel == 1)
+ ++numones;
+
+ // update VLC table
+ if (abslevel > incVlc[vlcnum])
+ ++vlcnum;
+
+ if (k == numcoeff - 1 - numtrailingones && abslevel >3)
+ vlcnum = 2;
+ }
+
+ if (numcoeff < max_coeff_num)
+ {
+ // decode total run
+ vlcnum = numcoeff - 1;
+ totzeros = readSyntaxElement_TotalZeros(currStream, vlcnum);
+ }
+ else
+ {
+ totzeros = 0;
+ }
+
+ // decode run before each coefficient
+ zerosleft = totzeros;
+ i = numcoeff - 1;
+
+ if (zerosleft > 0 && i > 0)
+ {
+ do
+ {
+ // select VLC for runbefore
+ vlcnum = imin(zerosleft - 1, RUNBEFORE_NUM_M1);
+
+ runarr[i] = readSyntaxElement_Run(currStream, vlcnum);
+
+ zerosleft -= runarr[i];
+ i --;
+ } while (zerosleft != 0 && i != 0);
+ }
+ runarr[i] = zerosleft;
+ } // if numcoeff
+}
+
+/*!
+************************************************************************
+* \brief
+* Get coefficients (run/level) of 4x4 blocks in a SMB
+* from the NAL (CABAC Mode)
+************************************************************************
+*/
+static void readCompCoeff4x4SMB_I16MB_CABAC(Macroblock *currMB, int context, h264_short_block_t *blocks, int block_y, int block_x, int64 *cbp_blk)
+{
+ // start_scan == 1
+ int i,j,k;
+ RunLevel rl;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+
+ const byte *pos_scan4x4 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_1D : FIELD_SCAN_1D;
+ const byte *pos_scan_4x4;
+ // make distinction between INTRA and INTER coded luminance coefficients
+ int type = (currMB->is_intra_block ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER);
+ DecodingEnvironment *de_cabac = &currSlice->partArr[partMap[type]].de_cabac;
+
+ for (j = 0; j < BLOCK_SIZE_8x8; j += BLOCK_SIZE)
+ {
+ currMB->subblock_y = block_y + j; // position for coeff_count ctx
+
+ for (i = 0; i < BLOCK_SIZE_8x8; i += BLOCK_SIZE)
+ {
+ int16_t *block = (int16_t *)(*blocks++);
+ currMB->subblock_x = block_x + i; // position for coeff_count ctx
+ pos_scan_4x4 = &pos_scan4x4[1];
+ for(k = 0; k < 16; k++)
+ {
+ rl = readRunLevel_CABAC(currMB, de_cabac, context);
+
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ block[*pos_scan_4x4++] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+ }
+}
+
+#ifdef _M_IX86
+static void readCompCoeff4x4SMB_CABAC(Macroblock *currMB, int context, h264_short_block_t *blocks, int block_y, int block_x, int64_t *cbp_blk64)
+#else
+static void readCompCoeff4x4SMB_CABAC(Macroblock *currMB, int context, h264_short_block_t *blocks, int block_y, int block_x, int64_t *cbp_blk)
+#endif
+{
+ int k;
+ RunLevel rl;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ const byte *pos_scan4x4 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_1D : FIELD_SCAN_1D;
+ const byte *pos_scan_4x4;
+ int16_t *block;
+#ifdef _M_IX86
+ int32_t *cbp_blk = (int32_t *)cbp_blk64;
+#endif
+ //h264_short_block_t *blocks = &currSlice->cof4[pl][cof4_pos_to_subblock[block_y>>2][block_x>>2]];
+ DecodingEnvironment *de_cabac_dc, *de_cabac_ac;
+ /*
+ * make distinction between INTRA and INTER coded
+ * luminance coefficients
+ */
+ if (currMB->is_intra_block)
+ {
+ de_cabac_dc = &currSlice->partArr[partMap[SE_LUM_DC_INTRA]].de_cabac;
+ de_cabac_ac = &currSlice->partArr[partMap[SE_LUM_AC_INTRA]].de_cabac;
+ }
+ else
+ {
+ de_cabac_dc = &currSlice->partArr[partMap[SE_LUM_DC_INTER]].de_cabac;
+ de_cabac_ac = &currSlice->partArr[partMap[SE_LUM_AC_INTER]].de_cabac;
+ }
+// for (j = block_y; j < (block_y+BLOCK_SIZE_8x8); j += 4)
+
+
+ block = (int16_t *)(*blocks++);
+ currMB->subblock_y = block_y; // position for coeff_count ctx
+ currMB->subblock_x = block_x; // position for coeff_count ctx
+ pos_scan_4x4 = pos_scan4x4;
+ rl = readRunLevel_CABAC(currMB, de_cabac_dc, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ *cbp_blk |= 1 << (block_y + (block_x >> 2)) ;
+ block[*pos_scan_4x4++] = rl.level;
+ for(k = 0; k < 16; ++k)
+ {
+ rl = readRunLevel_CABAC(currMB, de_cabac_ac, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ block[*pos_scan_4x4++] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+
+ block = (int16_t *)(*blocks++);
+ currMB->subblock_x += 4; // position for coeff_count ctx
+ pos_scan_4x4 = pos_scan4x4;
+ rl = readRunLevel_CABAC(currMB, de_cabac_dc, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ *cbp_blk |= 2 << (block_y + (block_x >> 2)) ;
+ block[*pos_scan_4x4++] = rl.level;
+ for(k = 0; k < 16; ++k)
+ {
+ rl = readRunLevel_CABAC(currMB, de_cabac_ac, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ block[*pos_scan_4x4++] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+ /* ---- */
+ block = (int16_t *)(*blocks++);
+ currMB->subblock_y += 4; // position for coeff_count ctx
+ currMB->subblock_x = block_x; // position for coeff_count ctx
+ pos_scan_4x4 = pos_scan4x4;
+ rl = readRunLevel_CABAC(currMB, de_cabac_dc, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ *cbp_blk |= 16 << (block_y + (block_x >> 2)) ;
+ block[*pos_scan_4x4++] = rl.level;
+ for(k = 0; k < 16; ++k)
+ {
+ rl = readRunLevel_CABAC(currMB, de_cabac_ac, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ block[*pos_scan_4x4++] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+
+ block = (int16_t *)(*blocks++);
+ currMB->subblock_x += 4; // position for coeff_count ctx
+ pos_scan_4x4 = pos_scan4x4;
+ rl = readRunLevel_CABAC(currMB, de_cabac_dc, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ *cbp_blk |= 32 << (block_y + (block_x >> 2)) ;
+ block[*pos_scan_4x4++] = rl.level;
+ for(k = 0; k < 16; ++k)
+ {
+ rl = readRunLevel_CABAC(currMB, de_cabac_ac, context);
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ block[*pos_scan_4x4++] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+
+}
+
+#if defined(_DEBUG) || defined(_M_IX64)
+static void inv_level_coefficients(h264_short_block_t *blocks, const int (*InvLevelScale)[4], int qp_per)
+{
+ int j, b;
+
+ for (b = 0;b<4;b++)
+ {
+ h264_short_block_row_t *block = blocks[b];
+ for (j = 0; j < 4; ++j)
+ {
+ if (block[j][0]) block[j][0]= rshift_rnd_sf((block[j][0] * InvLevelScale[j][0]) << qp_per, 4);
+ if (block[j][1]) block[j][1]= rshift_rnd_sf((block[j][1] * InvLevelScale[j][1]) << qp_per, 4);
+ if (block[j][2]) block[j][2]= rshift_rnd_sf((block[j][2] * InvLevelScale[j][2]) << qp_per, 4);
+ if (block[j][3]) block[j][3]= rshift_rnd_sf((block[j][3] * InvLevelScale[j][3]) << qp_per, 4);
+ }
+ }
+}
+#else
+void inv_level_coefficients(h264_short_block_t *blocks, const int (*InvLevelScale)[4], int qp_per);
+#endif
+
+static void inv_level_coefficients_AC(h264_short_block_t *blocks, const int (*InvLevelScale)[4], int qp_per)
+{
+ int b;
+
+ for (b = 0;b<4;b++)
+ {
+ h264_short_block_row_t *block = blocks[b];
+ if (block[0][1]) block[0][1]= rshift_rnd_sf((block[0][1] * InvLevelScale[0][1]) << qp_per, 4);
+ if (block[0][2]) block[0][2]= rshift_rnd_sf((block[0][2] * InvLevelScale[0][2]) << qp_per, 4);
+ if (block[0][3]) block[0][3]= rshift_rnd_sf((block[0][3] * InvLevelScale[0][3]) << qp_per, 4);
+
+ if (block[1][0]) block[1][0]= rshift_rnd_sf((block[1][0] * InvLevelScale[1][0]) << qp_per, 4);
+ if (block[1][1]) block[1][1]= rshift_rnd_sf((block[1][1] * InvLevelScale[1][1]) << qp_per, 4);
+ if (block[1][2]) block[1][2]= rshift_rnd_sf((block[1][2] * InvLevelScale[1][2]) << qp_per, 4);
+ if (block[1][3]) block[1][3]= rshift_rnd_sf((block[1][3] * InvLevelScale[1][3]) << qp_per, 4);
+
+ if (block[2][0]) block[2][0]= rshift_rnd_sf((block[2][0] * InvLevelScale[2][0]) << qp_per, 4);
+ if (block[2][1]) block[2][1]= rshift_rnd_sf((block[2][1] * InvLevelScale[2][1]) << qp_per, 4);
+ if (block[2][2]) block[2][2]= rshift_rnd_sf((block[2][2] * InvLevelScale[2][2]) << qp_per, 4);
+ if (block[2][3]) block[2][3]= rshift_rnd_sf((block[2][3] * InvLevelScale[2][3]) << qp_per, 4);
+
+ if (block[3][0]) block[3][0]= rshift_rnd_sf((block[3][0] * InvLevelScale[3][0]) << qp_per, 4);
+ if (block[3][1]) block[3][1]= rshift_rnd_sf((block[3][1] * InvLevelScale[3][1]) << qp_per, 4);
+ if (block[3][2]) block[3][2]= rshift_rnd_sf((block[3][2] * InvLevelScale[3][2]) << qp_per, 4);
+ if (block[3][3]) block[3][3]= rshift_rnd_sf((block[3][3] * InvLevelScale[3][3]) << qp_per, 4);
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Get coefficients (run/level) of all 4x4 blocks in a MB
+* from the NAL (CABAC Mode)
+************************************************************************
+*/
+static void readCompCoeff4x4MB_CABAC(Macroblock *currMB, ColorPlane pl, int intra, int (*InvLevelScale4x4)[4], int qp_per, int cbp)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int start_scan = IS_I16MB (currMB)? 1 : 0;
+ int64 *cbp_blk = &currMB->cbp_blk[pl];
+ int context;
+ h264_short_block_t *blocks = currSlice->cof4[pl];
+
+ currMB->is_intra_block = intra;
+
+ if( pl == PLANE_Y || IS_INDEPENDENT(p_Vid) )
+ context = (IS_I16MB(currMB) ? LUMA_16AC: LUMA_4x4);
+ else if (pl == PLANE_U)
+ context = (IS_I16MB(currMB) ? CB_16AC: CB_4x4);
+ else
+ context = (IS_I16MB(currMB) ? CR_16AC: CR_4x4);
+ if (start_scan == 0)
+ {
+ if (currMB->is_lossless == FALSE)
+ {
+ if (cbp & 1)
+ {
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk);
+ inv_level_coefficients(&blocks[0], InvLevelScale4x4, qp_per);
+ }
+ if (cbp & 2)
+ {
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk);
+ inv_level_coefficients(&blocks[4], InvLevelScale4x4, qp_per);
+ }
+ if (cbp & 4)
+ {
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk);
+ inv_level_coefficients(&blocks[8], InvLevelScale4x4, qp_per);
+ }
+ if (cbp & 8)
+ {
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk);
+ inv_level_coefficients(&blocks[12], InvLevelScale4x4, qp_per);
+ }
+ }
+ else
+ {
+ if (cbp & 1)
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk);
+ if (cbp & 2)
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk);
+ if (cbp & 4)
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk);
+ if (cbp & 8)
+ readCompCoeff4x4SMB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk);
+ }
+ }
+ else
+ {
+ if (currMB->is_lossless == FALSE)
+ {
+ if (cbp & 1) // are there any coeff in current block at all
+ {
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk);
+ inv_level_coefficients_AC(&blocks[0], InvLevelScale4x4, qp_per);
+ }
+ if (cbp & 2) // are there any coeff in current block at all
+ {
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk);
+ inv_level_coefficients_AC(&blocks[4], InvLevelScale4x4, qp_per);
+ }
+ if (cbp & 4) // are there any coeff in current block at all
+ {
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk);
+ inv_level_coefficients_AC(&blocks[8], InvLevelScale4x4, qp_per);
+ }
+ if (cbp & 8) // are there any coeff in current block at all
+ {
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk);
+ inv_level_coefficients_AC(&blocks[12], InvLevelScale4x4, qp_per);
+ }
+ }
+ else
+ {
+ if (cbp & 1)
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[0], 0, 0, cbp_blk);
+ if (cbp & 2)
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[4], 0, 8, cbp_blk);
+ if (cbp & 4)
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[8], 8, 0, cbp_blk);
+ if (cbp & 8)
+ readCompCoeff4x4SMB_I16MB_CABAC(currMB, context, &blocks[12], 8, 8, cbp_blk);
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Get coefficients (run/level) of one 8x8 block
+* from the NAL (CABAC Mode)
+************************************************************************
+*/
+static void readCompCoeff8x8_CABAC_Lossless(Macroblock *currMB, ColorPlane pl, int b8)
+{
+ if (currMB->cbp & (1<<b8)) // are there any coefficients in the current block
+ {
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int transform_pl = IS_INDEPENDENT(p_Vid) ? p_Vid->colour_plane_id : pl;
+ int scan;
+ short *tcoeffs;
+ int k;
+ RunLevel rl;
+ int context;
+ DataPartition *dP;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+
+ int cbp_mask = (int64) 51 << (4 * b8 - 2 * (b8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position
+ int64 *cur_cbp = &currMB->cbp_blk[pl];
+
+ // select scan type
+ const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D;
+
+ int qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[pl] ];
+ int qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[pl] ];
+
+ const int *InvLevelScale8x8 = IS_INTRA(currMB)? currSlice->InvLevelScale8x8_Intra[transform_pl][qp_rem] : currSlice->InvLevelScale8x8_Inter[transform_pl][qp_rem];
+
+ currMB->is_intra_block = IS_INTRA(currMB);
+
+ // === set offset in current macroblock ===
+ tcoeffs = (short *)(currSlice->mb_rres8[pl][b8]);
+
+ currMB->subblock_x = (b8&0x01) << 3; // position for coeff_count ctx
+ currMB->subblock_y = (b8 >> 1) << 3; // position for coeff_count ctx
+
+ if (pl==PLANE_Y || IS_INDEPENDENT(p_Vid))
+ context = LUMA_8x8;
+ else if (pl==PLANE_U)
+ context = CB_8x8;
+ else
+ context = CR_8x8;
+
+ for(k=0; (k < 65);++k)
+ {
+ //============ read =============
+ /*
+ * make distinction between INTRA and INTER coded
+ * luminance coefficients
+ */
+
+ int type = ((currMB->is_intra_block == 1)
+ ? (k==0 ? SE_LUM_DC_INTRA : SE_LUM_AC_INTRA)
+ : (k==0 ? SE_LUM_DC_INTER : SE_LUM_AC_INTER));
+
+ dP = &(currSlice->partArr[partMap[type]]);
+ rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), context);
+
+ //============ decode =============
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan8x8 += rl.run;
+
+ scan = *pos_scan8x8++;
+
+ *cur_cbp |= cbp_mask;
+
+ tcoeffs[scan] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+}
+
+
+static void readCompCoeff8x8_CABAC_Intra(Macroblock *currMB, ColorPlane pl, int b8)
+{
+ if (currMB->cbp & (1<<b8)) // are there any coefficients in the current block
+ {
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int transform_pl = IS_INDEPENDENT(p_Vid) ? p_Vid->colour_plane_id : pl;
+ int scan;
+ short *tcoeffs;
+ RunLevel rl;
+ int k;
+ int context;
+ DecodingEnvironment *cabac;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+
+ int cbp_mask = (int64) 51 << (4 * b8 - 2 * (b8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position
+ int64 *cur_cbp = &currMB->cbp_blk[pl];
+
+ // select scan type
+ const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D;
+
+ int qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[pl] ];
+ int qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[pl] ];
+
+ const int *InvLevelScale8x8 = currSlice->InvLevelScale8x8_Intra[transform_pl][qp_rem];
+
+ currMB->is_intra_block = 1;
+
+ // === set offset in current macroblock ===
+ tcoeffs = (short *)(currSlice->mb_rres8[pl][b8]);
+
+ currMB->subblock_x = (b8&0x01) << 3; // position for coeff_count ctx
+ currMB->subblock_y = (b8 >> 1) << 3; // position for coeff_count ctx
+
+ if (pl==PLANE_Y || IS_INDEPENDENT(p_Vid))
+ context = LUMA_8x8;
+ else if (pl==PLANE_U)
+ context = CB_8x8;
+ else
+ context = CR_8x8;
+
+ // Read DC
+ cabac = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]].de_cabac);
+ rl = readRunLevel_CABAC(currMB, cabac, context);
+
+ //============ decode =============
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ *cur_cbp |= cbp_mask;
+
+ pos_scan8x8 += rl.run;
+
+ scan = *pos_scan8x8++;
+
+ tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization
+
+ // AC coefficients
+ cabac = &(currSlice->partArr[partMap[SE_LUM_AC_INTRA]].de_cabac);
+
+ k = 64;
+ do
+ {
+ rl = readRunLevel_CABAC(currMB, cabac, context);
+
+ //============ decode =============
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan8x8 += rl.run;
+
+ scan = *pos_scan8x8++;
+
+ tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization
+ }
+ else
+ break;
+ } while (--k);
+ }
+ }
+
+}
+
+
+
+static void readCompCoeff8x8_CABAC_Inter(Macroblock *currMB, ColorPlane pl, int b8)
+{
+ if (currMB->cbp & (1<<b8)) // are there any coefficients in the current block
+ {
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int transform_pl = IS_INDEPENDENT(p_Vid) ? p_Vid->colour_plane_id : pl;
+ int scan;
+ short *tcoeffs;
+ int k;
+ RunLevel rl;
+ int context;
+ DecodingEnvironment *cabac;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+
+ int cbp_mask = (int64) 51 << (4 * b8 - 2 * (b8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position
+ int64 *cur_cbp = &currMB->cbp_blk[pl];
+
+ // select scan type
+ const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D;
+
+ int qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[pl] ];
+ int qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[pl] ];
+
+ const int *InvLevelScale8x8 = currSlice->InvLevelScale8x8_Inter[transform_pl][qp_rem];
+
+ currMB->is_intra_block = 0;
+
+ // === set offset in current macroblock ===
+ tcoeffs = (short *)(currSlice->mb_rres8[pl][b8]);
+
+ currMB->subblock_x = (b8&0x01) << 3; // position for coeff_count ctx
+ currMB->subblock_y = (b8 >> 1) << 3; // position for coeff_count ctx
+
+ if (pl==PLANE_Y || IS_INDEPENDENT(p_Vid))
+ context = LUMA_8x8;
+ else if (pl==PLANE_U)
+ context = CB_8x8;
+ else
+ context = CR_8x8;
+
+ // Read DC
+ cabac = &(currSlice->partArr[partMap[SE_LUM_DC_INTER]].de_cabac);
+ rl = readRunLevel_CABAC(currMB, cabac, context);
+
+ //============ decode =============
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ *cur_cbp |= cbp_mask;
+
+ pos_scan8x8 += rl.run;
+
+ scan = *pos_scan8x8++;
+
+ tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization
+
+ // AC coefficients
+ cabac = &(currSlice->partArr[partMap[SE_LUM_AC_INTER]].de_cabac);
+
+ k=64;
+ do
+ {
+ rl = readRunLevel_CABAC(currMB, cabac, context);
+
+ //============ decode =============
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan8x8 += rl.run;
+
+ scan = *pos_scan8x8++;
+
+ tcoeffs[scan] = rshift_rnd_sf((rl.level * InvLevelScale8x8[scan]) << qp_per, 6); // dequantization
+ }
+ else
+ break;
+ } while (--k);
+ }
+ }
+
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Get coefficients (run/level) of 8x8 blocks in a MB
+* from the NAL (CABAC Mode)
+************************************************************************
+*/
+static void readCompCoeff8x8MB_CABAC(Macroblock *currMB, ColorPlane pl)
+{
+ //======= 8x8 transform size & CABAC ========
+ if(currMB->is_lossless == FALSE)
+ {
+ if (IS_INTRA(currMB))
+ {
+ readCompCoeff8x8_CABAC_Intra(currMB, pl, 0);
+ readCompCoeff8x8_CABAC_Intra(currMB, pl, 1);
+ readCompCoeff8x8_CABAC_Intra(currMB, pl, 2);
+ readCompCoeff8x8_CABAC_Intra(currMB, pl, 3);
+ }
+ else
+ {
+ readCompCoeff8x8_CABAC_Inter(currMB, pl, 0);
+ readCompCoeff8x8_CABAC_Inter(currMB, pl, 1);
+ readCompCoeff8x8_CABAC_Inter(currMB, pl, 2);
+ readCompCoeff8x8_CABAC_Inter(currMB, pl, 3);
+ }
+ }
+ else
+ {
+ readCompCoeff8x8_CABAC_Lossless(currMB, pl, 0);
+ readCompCoeff8x8_CABAC_Lossless(currMB, pl, 1);
+ readCompCoeff8x8_CABAC_Lossless(currMB, pl, 2);
+ readCompCoeff8x8_CABAC_Lossless(currMB, pl, 3);
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Get coefficients (run/level) of 4x4 blocks in a MB
+* from the NAL (CABAC Mode)
+************************************************************************
+*/
+static void readCompCoeff4x4MB_CAVLC (Macroblock *currMB, ColorPlane pl, int (*InvLevelScale4x4)[4], int qp_per, int cbp, h264_4x4_byte nzcoeff)
+{
+ int block_y, block_x, b8;
+ int i, j, k;
+ int i0, j0;
+ __declspec(align(32)) int levarr[16], runarr[16];
+ int numcoeff;
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const byte (*pos_scan4x4)[2] = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN : FIELD_SCAN;
+ const byte *pos_scan_4x4 = pos_scan4x4[0];
+ int start_scan = IS_I16MB(currMB) ? 1 : 0;
+ int64 *cur_cbp = &currMB->cbp_blk[pl];
+ int coef_ctr, cur_context;
+
+ memzero64(levarr);
+ memzero64(runarr);
+
+ if (IS_I16MB(currMB))
+ {
+ if (pl == PLANE_Y)
+ cur_context = LUMA_INTRA16x16AC;
+ else if (pl == PLANE_U)
+ cur_context = CB_INTRA16x16AC;
+ else
+ cur_context = CR_INTRA16x16AC;
+ }
+ else
+ {
+ if (pl == PLANE_Y)
+ cur_context = LUMA;
+ else if (pl == PLANE_U)
+ cur_context = CB;
+ else
+ cur_context = CR;
+ }
+
+ if (currMB->is_lossless == FALSE)
+ {
+ for (block_y = 0; block_y < 4; block_y += 2) /* all modes */
+ {
+ for (block_x = 0; block_x < 4; block_x += 2)
+ {
+ b8 = (block_y + (block_x >> 1));
+
+ if (cbp & (1 << b8)) // test if the block contains any coefficients
+ {
+ for (j=block_y << 2; j < (block_y + 2) << 2; j += BLOCK_SIZE)
+ {
+ for (i=block_x << 2; i < (block_x + 2) << 2; i += BLOCK_SIZE)
+ {
+ readCoeff4x4_CAVLC(currMB, cur_context, i >> 2, j >> 2, levarr, runarr, &numcoeff);
+ pos_scan_4x4 = pos_scan4x4[start_scan];
+
+ for (k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0)
+ {
+ pos_scan_4x4 += (runarr[k] << 1);
+
+ i0 = *pos_scan_4x4++;
+ j0 = *pos_scan_4x4++;
+
+ // inverse quant for 4x4 transform only
+ *cur_cbp |= (int64) 1 << (j + (i >> 2));
+
+ currSlice->cof4[pl][cof4_pos_to_subblock[j>>2][i>>2]][j0][i0]= rshift_rnd_sf((levarr[k] * InvLevelScale4x4[j0][i0])<<qp_per, 4);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j=0; j < 2; j++)
+ {
+ for (i=0;i<2;i++)
+ {
+ nzcoeff[block_y+j][block_x+i]=0;
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (block_y=0; block_y < 4; block_y += 2) /* all modes */
+ {
+ for (block_x=0; block_x < 4; block_x += 2)
+ {
+ b8 = 2*(block_y>>1) + (block_x>>1);
+
+ if (cbp & (1<<b8)) /* are there any coeff in current block at all */
+ {
+ for (j=block_y; j < block_y+2; ++j)
+ {
+ for (i=block_x; i < block_x+2; ++i)
+ {
+ readCoeff4x4_CAVLC(currMB, cur_context, i, j, levarr, runarr, &numcoeff);
+
+ coef_ctr = start_scan - 1;
+
+ for (k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0)
+ {
+ coef_ctr += runarr[k]+1;
+
+ i0=pos_scan4x4[coef_ctr][0];
+ j0=pos_scan4x4[coef_ctr][1];
+
+ *cur_cbp |= (int64) 1 << ((j<<2) + i);
+ currSlice->cof4[pl][cof4_pos_to_subblock[j>>2][i>>2]][j0][i0]= levarr[k];
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j=0; j < 2; j++)
+ {
+ for (i=0;i<2;i++)
+ {
+ nzcoeff[block_y+j][block_x+i]=0;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Get coefficients (run/level) of 4x4 blocks in a MB
+* from the NAL (CABAC Mode)
+************************************************************************
+*/
+static void readCompCoeff8x8MB_CAVLC (Macroblock *currMB, ColorPlane pl, const int *InvLevelScale8x8, int qp_per, int cbp, h264_4x4_byte nzcoeff)
+{
+ int block_y, block_x, b4, b8;
+ int i,j,k;
+ int scan;
+ __declspec(align(32)) int levarr[16] = {0}, runarr[16] = {0};
+ int numcoeff;
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const byte *pos_scan8x8 = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN8x8_1D : FIELD_SCAN8x8_1D;
+ int start_scan = IS_I16MB(currMB) ? 1 : 0;
+ int64 *cur_cbp = &currMB->cbp_blk[pl];
+ int coef_ctr, cur_context;
+ short *coefficients;
+
+ if (IS_I16MB(currMB))
+ {
+ if (pl == PLANE_Y)
+ cur_context = LUMA_INTRA16x16AC;
+ else if (pl == PLANE_U)
+ cur_context = CB_INTRA16x16AC;
+ else
+ cur_context = CR_INTRA16x16AC;
+ }
+ else
+ {
+ if (pl == PLANE_Y)
+ cur_context = LUMA;
+ else if (pl == PLANE_U)
+ cur_context = CB;
+ else
+ cur_context = CR;
+ }
+
+ if (currMB->is_lossless == FALSE)
+ {
+ for (block_y=0; block_y < 4; block_y += 2) /* all modes */
+ {
+ for (block_x=0; block_x < 4; block_x += 2)
+ {
+ b8 = block_y + (block_x>>1);
+ coefficients =(short *)(currSlice->mb_rres8[pl][b8]);
+ if (cbp & (1<<b8)) /* are there any coeff in current block at all */
+ {
+ for (j=block_y; j < block_y+2; ++j)
+ {
+ for (i=block_x; i < block_x+2; ++i)
+ {
+ readCoeff4x4_CAVLC(currMB, cur_context, i, j, levarr, runarr, &numcoeff);
+
+ coef_ctr = start_scan - 1;
+
+ for (k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0)
+ {
+ coef_ctr += runarr[k]+1;
+
+ // do same as CABAC for deblocking: any coeff in the 8x8 marks all the 4x4s
+ //as containing coefficients
+ *cur_cbp |= 51 << ((block_y<<2) + block_x);
+
+ b4 = (coef_ctr << 2) + 2*(j - block_y)+(i - block_x);
+
+ scan = pos_scan8x8[b4];
+
+ coefficients[scan] = rshift_rnd_sf((levarr[k] * InvLevelScale8x8[scan])<<qp_per, 6); // dequantization
+ }
+ }//else (!currMB->luma_transform_size_8x8_flag)
+ }
+ }
+ }
+ else
+ {
+ for (j=block_y; j < block_y+2; ++j)
+ {
+ memset(&nzcoeff[j][block_x], 0, 2 * sizeof(byte));
+ }
+ }
+ }
+ }
+ }
+ else // inverse quant for 8x8 transform
+ {
+ for (block_y=0; block_y < 4; block_y += 2) /* all modes */
+ {
+ for (block_x=0; block_x < 4; block_x += 2)
+ {
+ b8 = 2*(block_y>>1) + (block_x>>1);
+ coefficients =(short *)(currSlice->mb_rres8[pl][b8]);
+ if (cbp & (1<<b8)) /* are there any coeff in current block at all */
+ {
+ for (j=block_y; j < block_y+2; ++j)
+ {
+ for (i=block_x; i < block_x+2; ++i)
+ {
+
+ readCoeff4x4_CAVLC(currMB, cur_context, i, j, levarr, runarr, &numcoeff);
+
+ coef_ctr = start_scan - 1;
+
+ for (k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0)
+ {
+ coef_ctr += runarr[k]+1;
+
+ // do same as CABAC for deblocking: any coeff in the 8x8 marks all the 4x4s
+ //as containing coefficients
+ *cur_cbp |= 51 << ((block_y<<2) + block_x);
+
+ b4 = 2*(j-block_y)+(i-block_x);
+
+ scan=pos_scan8x8[coef_ctr*4+b4];
+
+ coefficients[scan] = levarr[k];
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j=block_y; j < block_y+2; ++j)
+ {
+ memset(&nzcoeff[j][block_x], 0, 2 * sizeof(byte));
+ }
+ }
+ }
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Data partitioning: Check if neighboring macroblock is needed for
+* CAVLC context decoding, and disable current MB if data partition
+* is missing.
+************************************************************************
+*/
+static void check_dp_neighbors (Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (IS_INTER (currMB) || (IS_INTRA (currMB) && !(p_Vid->active_pps->constrained_intra_pred_flag)) )
+ {
+ PixelPos up, left;
+
+ p_Vid->getNeighbourLeft(currMB, p_Vid->mb_size[1], &left);
+ p_Vid->getNeighbourUp(currMB, p_Vid->mb_size[1], &up);
+
+ if (left.available)
+ {
+ currMB->dpl_flag |= p_Vid->mb_data[left.mb_addr].dpl_flag;
+ }
+ if (up.available)
+ {
+ currMB->dpl_flag |= p_Vid->mb_data[up.mb_addr].dpl_flag;
+ }
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Get coded block pattern and coefficients (run/level)
+* from the NAL
+************************************************************************
+*/
+static void read_CBP_and_coeffs_from_NAL_CABAC(Macroblock *currMB)
+{
+ int i,j,k;
+ int cbp;
+ SyntaxElement currSE;
+ DataPartition *dP = NULL;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ int coef_ctr, i0, j0, b8;
+ int ll;
+ RunLevel rl;
+
+ int qp_per, qp_rem;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int intra = IS_INTRA (currMB);
+ int smb = ((p_Vid->type==SP_SLICE) && !intra) || (p_Vid->type == SI_SLICE && currMB->mb_type == SI4MB);
+
+ int uv;
+ int qp_per_uv[2];
+ int qp_rem_uv[2];
+
+
+ int temp[4];
+
+ int b4;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int yuv = dec_picture->chroma_format_idc - 1;
+ int m6[4];
+
+ int need_transform_size_flag;
+
+ int (*InvLevelScale4x4)[4] = NULL;
+
+ // select scan type
+ const byte (*pos_scan4x4)[2] = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN : FIELD_SCAN;
+ const byte *pos_scan4x4_1d = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_1D : FIELD_SCAN_1D;
+ const byte *pos_scan4x4_dc = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN_DC : FIELD_SCAN_DC;
+ const byte *pos_scan_4x4;
+
+ // QPI
+ //init constants for every chroma qp offset
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ for (i=0; i<2; ++i)
+ {
+ qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ];
+ qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ];
+ }
+ }
+
+ // read CBP if not new intra mode
+ if (!IS_I16MB (currMB))
+ {
+ //===== C B P =====
+ //---------------------
+ int type = (currMB->mb_type == I4MB || currMB->mb_type == SI4MB || currMB->mb_type == I8MB)
+ ? SE_CBP_INTRA
+ : SE_CBP_INTER;
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+ currMB->cbp = cbp = readCBP_CABAC(currMB, &(dP->de_cabac));
+
+ TRACE_STRING("coded_block_pattern");
+
+
+ //============= Transform size flag for INTER MBs =============
+ //-------------------------------------------------------------
+ need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)||
+ (IS_DIRECT(currMB) && p_Vid->active_sps->direct_8x8_inference_flag) ||
+ (currMB->NoMbPartLessThan8x8Flag))
+ && currMB->mb_type != I8MB && currMB->mb_type != I4MB
+ && (currMB->cbp&15)
+ && p_Vid->Transform8x8Mode);
+
+ if (need_transform_size_flag)
+ {
+ dP = &(currSlice->partArr[partMap[SE_HEADER]]);
+ TRACE_STRING("transform_size_8x8_flag");
+
+ // read CAVLC transform_size_8x8_flag
+ currMB->luma_transform_size_8x8_flag = readMB_transform_size_flag_CABAC(currMB, &(dP->de_cabac));
+ }
+
+ //===== DQUANT =====
+ //----------------------
+ // Delta quant only if nonzero coeffs
+ if (cbp !=0)
+ {
+ read_delta_quant_CABAC(&currSE, dP, currMB, partMap, (!intra) ? SE_DELTA_QUANT_INTER : SE_DELTA_QUANT_INTRA);
+
+ if (currSlice->dp_mode)
+ {
+ if (!intra && currSlice->dpC_NotPresent )
+ currMB->dpl_flag = 1;
+
+ if( intra && currSlice->dpB_NotPresent )
+ {
+ currMB->ei_flag = 1;
+ currMB->dpl_flag = 1;
+ }
+
+ // check for prediction from neighbours
+ check_dp_neighbors (currMB);
+ if (currMB->dpl_flag)
+ {
+ cbp = 0;
+ currMB->cbp = cbp;
+ }
+ }
+ }
+ }
+ else
+ {
+ cbp = currMB->cbp;
+ }
+
+ if (IS_I16MB (currMB)) // read DC coeffs for new intra modes
+ {
+ read_delta_quant_CABAC(&currSE, dP, currMB, partMap, SE_DELTA_QUANT_INTRA);
+
+ macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y);
+
+ if (currSlice->dp_mode)
+ {
+ if (currSlice->dpB_NotPresent)
+ {
+ currMB->ei_flag = 1;
+ currMB->dpl_flag = 1;
+ }
+ check_dp_neighbors (currMB);
+ if (currMB->dpl_flag)
+ {
+ currMB->cbp = cbp = 0;
+ }
+ }
+
+ if (!currMB->dpl_flag)
+ {
+ pos_scan_4x4 = pos_scan4x4_dc;
+
+ {
+ dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]);
+
+ currMB->is_intra_block = 1;
+
+ for(k = 0; k < 17 ; k++)
+ {
+ rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), LUMA_16DC);
+
+ if (rl.level != 0) /* leave if level == 0 */
+ {
+ pos_scan_4x4 += rl.run;
+ currSlice->cof4[0][*pos_scan_4x4++][0][0] = rl.level;// add new intra DC coeff
+ }
+ else
+ break;
+ }
+
+ }
+
+ if(currMB->is_lossless == FALSE)
+ itrans_2(currMB, (ColorPlane) p_Vid->colour_plane_id);// transform new intra DC
+ }
+ }
+
+ update_qp(currMB, p_Vid->qp);
+
+ qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ];
+ qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ];
+
+ //init quant parameters for chroma
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ for(i=0; i < 2; ++i)
+ {
+ qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ];
+ qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ];
+ }
+ }
+
+ InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[p_Vid->colour_plane_id][qp_rem] : currSlice->InvLevelScale4x4_Inter[p_Vid->colour_plane_id][qp_rem];
+
+ // luma coefficients
+ {
+ //======= Other Modes & CABAC ========
+ //------------------------------------
+ if (cbp)
+ {
+ if(currMB->luma_transform_size_8x8_flag)
+ {
+ //======= 8x8 transform size & CABAC ========
+ readCompCoeff8x8MB_CABAC (currMB, PLANE_Y);
+ }
+ else
+ {
+ readCompCoeff4x4MB_CABAC (currMB, PLANE_Y, intra, InvLevelScale4x4, qp_per, cbp);
+ }
+ }
+ }
+
+ if ( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) )
+ {
+ for (uv = 0; uv < 2; ++uv )
+ {
+ /*----------------------16x16DC Luma_Add----------------------*/
+ if (IS_I16MB (currMB)) // read DC coeffs for new intra modes
+ {
+ macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y);
+
+ {
+ int context;
+ dP = &(currSlice->partArr[partMap[SE_LUM_DC_INTRA]]);
+
+ if( IS_INDEPENDENT(p_Vid) )
+ context = LUMA_16DC;
+ else
+ context = (uv==0) ? CB_16DC : CR_16DC;
+
+ currMB->is_intra_block = 1;
+
+ coef_ctr = -1;
+
+ for(k=0;k<17;++k)
+ {
+ rl = readRunLevel_CABAC(currMB, &dP->de_cabac, context);
+
+ if (rl.level != 0) // leave if level == 0
+ {
+ coef_ctr += rl.run + 1;
+ currSlice->cof4[uv + 1][pos_scan4x4_1d[coef_ctr]][0][0] = rl.level;
+ }
+ else
+ break;
+ } //k loop
+ } // else CAVLC
+
+ if(currMB->is_lossless == FALSE)
+ {
+ itrans_2(currMB, (ColorPlane) (uv + 1)); // transform new intra DC
+ }
+ } //IS_I16MB
+
+ update_qp(currMB, p_Vid->qp);
+
+ qp_per = p_Vid->qp_per_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ];
+ qp_rem = p_Vid->qp_rem_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ];
+
+ //init constants for every chroma qp offset
+ qp_per_uv[uv] = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ];
+ qp_rem_uv[uv] = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ];
+
+ InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+
+ {
+ if (cbp)
+ {
+ if(currMB->luma_transform_size_8x8_flag)
+ {
+ //======= 8x8 transform size & CABAC ========
+ readCompCoeff8x8MB_CABAC(currMB, (ColorPlane) (PLANE_U + uv));
+ }
+ else //4x4
+ {
+ readCompCoeff4x4MB_CABAC(currMB, (ColorPlane) (PLANE_U + uv), intra, InvLevelScale4x4, qp_per_uv[uv], cbp);
+ }
+ }
+ }
+ }
+ } //444
+ else if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ //========================== CHROMA DC ============================
+ //-----------------------------------------------------------------
+ // chroma DC coeff
+ if(cbp>15)
+ {
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ for (ll=0;ll<3;ll+=2)
+ {
+ uv = ll>>1;
+
+ InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+ //===================== CHROMA DC YUV420 ======================
+ memzero16(&currSlice->cofu[0]);
+ coef_ctr=-1;
+
+ {
+ int type = (intra ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER);
+
+ currMB->is_intra_block = intra;
+ currMB->is_v_block = ll;
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+ for(k = 0; k < (p_Vid->num_cdc_coeff + 1);++k)
+ {
+ rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), CHROMA_DC);
+
+ if (rl.level != 0)
+ {
+ currMB->cbp_blk[0] |= 0xf0000 << (ll<<1) ;
+ coef_ctr += rl.run + 1;
+
+ // Bug: currSlice->cofu has only 4 entries, hence coef_ctr MUST be <4 (which is
+ // caught by the assert(). If it is bigger than 4, it starts patching the
+ // p_Vid->predmode pointer, which leads to bugs later on.
+ //
+ // This assert() should be left in the code, because it captures a very likely
+ // bug early when testing in error prone environments (or when testing NAL
+ // functionality).
+ assert (coef_ctr < p_Vid->num_cdc_coeff);
+ currSlice->cofu[coef_ctr&3]=rl.level;
+ }
+ else
+ break;
+ }
+ }
+
+ if (smb || (currMB->is_lossless == TRUE)) // check to see if MB type is SPred or SIntra4x4
+ {
+ currSlice->cof4[uv + 1][0][0][0] = currSlice->cofu[0];
+ currSlice->cof4[uv + 1][1][0][0] = currSlice->cofu[1];
+ currSlice->cof4[uv + 1][2][0][0] = currSlice->cofu[2];
+ currSlice->cof4[uv + 1][3][0][0] = currSlice->cofu[3];
+ }
+ else
+ {
+ ihadamard2x2(currSlice->cofu, temp);
+
+ currSlice->cof4[uv + 1][0][0][0] = (((temp[0] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ currSlice->cof4[uv + 1][1][0][0] = (((temp[1] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ currSlice->cof4[uv + 1][2][0][0] = (((temp[2] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ currSlice->cof4[uv + 1][3][0][0] = (((temp[3] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ }
+ }
+ }
+ else if (dec_picture->chroma_format_idc == YUV422)
+ {
+ for (ll=0;ll<3;ll+=2)
+ {
+ int (*InvLevelScale4x4)[4] = NULL;
+ uv = ll>>1;
+ {
+ h264_short_block_t *imgcof = currSlice->cof4[uv + 1];
+ int m3[2][4] = {{0,0,0,0},{0,0,0,0}};
+ int m4[2][4] = {{0,0,0,0},{0,0,0,0}};
+ int qp_per_uv_dc = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only
+ int qp_rem_uv_dc = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only
+ if (intra)
+ InvLevelScale4x4 = currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv_dc];
+ else
+ InvLevelScale4x4 = currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv_dc];
+
+
+ //===================== CHROMA DC YUV422 ======================
+ {
+ coef_ctr=-1;
+ for(k=0;k<9;++k)
+ {
+ int type = (intra ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER);
+ currMB->is_intra_block = intra;
+ currMB->is_v_block = ll;
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+ rl = readRunLevel_CABAC(currMB, &dP->de_cabac, CHROMA_DC_2x4);
+
+ if (rl.level != 0)
+ {
+ currMB->cbp_blk[0] |= ((int64)0xff0000) << (ll<<2) ;
+ coef_ctr += rl.run + 1;
+ assert (coef_ctr < p_Vid->num_cdc_coeff);
+ i0=SCAN_YUV422[coef_ctr][0];
+ j0=SCAN_YUV422[coef_ctr][1];
+
+ m3[i0][j0]=rl.level;
+ }
+ else
+ break;
+ }
+ }
+ // inverse CHROMA DC YUV422 transform
+ // horizontal
+ if(currMB->is_lossless == FALSE)
+ {
+ m4[0][0] = m3[0][0] + m3[1][0];
+ m4[0][1] = m3[0][1] + m3[1][1];
+ m4[0][2] = m3[0][2] + m3[1][2];
+ m4[0][3] = m3[0][3] + m3[1][3];
+
+ m4[1][0] = m3[0][0] - m3[1][0];
+ m4[1][1] = m3[0][1] - m3[1][1];
+ m4[1][2] = m3[0][2] - m3[1][2];
+ m4[1][3] = m3[0][3] - m3[1][3];
+
+ for (i = 0; i < 2; ++i)
+ {
+ m6[0] = m4[i][0] + m4[i][2];
+ m6[1] = m4[i][0] - m4[i][2];
+ m6[2] = m4[i][1] - m4[i][3];
+ m6[3] = m4[i][1] + m4[i][3];
+
+ imgcof[cof4_pos_to_subblock[0][i]][0][0] = m6[0] + m6[3];
+ imgcof[cof4_pos_to_subblock[1][i]][0][0] = m6[1] + m6[2];
+ imgcof[cof4_pos_to_subblock[2][i]][0][0] = m6[1] - m6[2];
+ imgcof[cof4_pos_to_subblock[3][i]][0][0]= m6[0] - m6[3];
+ }//for (i=0;i<2;++i)
+ }
+ else
+ {
+ for(j=0;j<4;++j)
+ {
+ for(i=0;i<2;++i)
+ {
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][0][0] = m3[i][j];
+ }
+ }
+ }
+
+ for(j = 0;j < p_Vid->mb_cr_size_y; j += BLOCK_SIZE)
+ {
+ for(i=0;i < p_Vid->mb_cr_size_x;i+=BLOCK_SIZE)
+ {
+ imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] = rshift_rnd_sf((imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] * InvLevelScale4x4[0][0]) << qp_per_uv_dc, 6);
+ }
+ }
+ }
+ }//for (ll=0;ll<3;ll+=2)
+ }//else if (dec_picture->chroma_format_idc == YUV422)
+ }
+
+ //========================== CHROMA AC ============================
+ //-----------------------------------------------------------------
+ // chroma AC coeff, all zero fram start_scan
+ if (cbp<=31)
+ {
+ }
+ else
+ {
+ {
+ int type;
+ currMB->is_intra_block = intra;
+ type = (intra ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER);
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+
+ if(currMB->is_lossless == FALSE)
+ {
+ for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8)
+ {
+ currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 ));
+ InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+
+ for (b4 = 0; b4 < 4; ++b4)
+ {
+ int *scale = &InvLevelScale4x4[0][0];
+ i = cofuv_blk_x[yuv][b8][b4];
+ j = cofuv_blk_y[yuv][b8][b4];
+
+ currMB->subblock_y = subblk_offset_y[yuv][b8][b4];
+ currMB->subblock_x = subblk_offset_x[yuv][b8][b4];
+
+ pos_scan_4x4 = &pos_scan4x4_1d[1];
+ for(k = 0; k < 16;++k)
+ {
+ rl = readRunLevel_CABAC(currMB, &(dP->de_cabac), CHROMA_AC);
+
+ if (rl.level != 0)
+ {
+ byte position;
+ currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4];
+ pos_scan_4x4 += rl.run;
+ position = *pos_scan_4x4++;
+
+ ((int16_t *)currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]])[position] = rshift_rnd_sf((rl.level * scale[position])<<qp_per_uv[uv], 4);
+ }
+ else
+ break;
+ } //for(k=0;(k<16)&&(level!=0);++k)
+ }
+ }
+ }
+ else
+ {
+ for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8)
+ {
+ currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 ));
+
+ for (b4=0; b4 < 4; ++b4)
+ {
+ i = cofuv_blk_x[yuv][b8][b4];
+ j = cofuv_blk_y[yuv][b8][b4];
+
+ pos_scan_4x4 = &pos_scan4x4_1d[1];
+
+ currMB->subblock_y = subblk_offset_y[yuv][b8][b4];
+ currMB->subblock_x = subblk_offset_x[yuv][b8][b4];
+
+ for(k=0;k<16;++k)
+ {
+ rl = readRunLevel_CABAC(currMB, &dP->de_cabac, CHROMA_AC);
+
+ if (rl.level != 0)
+ {
+ currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4];
+ pos_scan_4x4 += rl.run;
+
+ ((int16_t *)currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]])[*pos_scan_4x4++] = rl.level;
+ }
+ else
+ break;
+ }
+ }
+ }
+ } //for (b4=0; b4 < 4; b4++)
+ } //for (b8=0; b8 < p_Vid->num_blk8x8_uv; b8++)
+ } //if (dec_picture->chroma_format_idc != YUV400)
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Get coded block pattern and coefficients (run/level)
+* from the NAL
+************************************************************************
+*/
+static void read_CBP_and_coeffs_from_NAL_CAVLC(Macroblock *currMB)
+{
+ int i,j,k;
+ int level;
+ int mb_nr = currMB->mbAddrX;
+ int cbp;
+ SyntaxElement currSE;
+ DataPartition *dP = NULL;
+ Slice *currSlice = currMB->p_Slice;
+ const byte *partMap = assignSE2partition[currSlice->dp_mode];
+ int coef_ctr, i0, j0, b8;
+ int ll;
+ __declspec(align(32)) int levarr[16], runarr[16];
+ int numcoeff;
+
+ int qp_per, qp_rem;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int smb = ((p_Vid->type==SP_SLICE) && IS_INTER (currMB)) || (p_Vid->type == SI_SLICE && currMB->mb_type == SI4MB);
+
+ int uv;
+ int qp_per_uv[2];
+ int qp_rem_uv[2];
+
+ int intra = IS_INTRA (currMB);
+ int temp[4];
+
+ int b4;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int yuv = dec_picture->chroma_format_idc - 1;
+ int m6[4];
+
+ int need_transform_size_flag;
+
+ int (*InvLevelScale4x4)[4] = NULL;
+ const int *InvLevelScale8x8 = NULL;
+ // select scan type
+ const byte (*pos_scan4x4)[2] = ((p_Vid->structure == FRAME) && (!currMB->mb_field)) ? SNGL_SCAN : FIELD_SCAN;
+ const byte *pos_scan_4x4 = pos_scan4x4[0];
+
+ // QPI
+ //init constants for every chroma qp offset
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ for (i=0; i<2; ++i)
+ {
+ qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ];
+ qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ];
+ }
+ }
+
+ // read CBP if not new intra mode
+ if (!IS_I16MB (currMB))
+ {
+ //===== C B P =====
+ //---------------------
+ int type = (currMB->mb_type == I4MB || currMB->mb_type == SI4MB || currMB->mb_type == I8MB)
+ ? SE_CBP_INTRA
+ : SE_CBP_INTER;
+
+ dP = &(currSlice->partArr[partMap[type]]);
+
+ currSE.mapping = (currMB->mb_type == I4MB || currMB->mb_type == SI4MB || currMB->mb_type == I8MB)
+ ? currSlice->linfo_cbp_intra
+ : currSlice->linfo_cbp_inter;
+
+ TRACE_STRING("coded_block_pattern");
+ readSyntaxElement_UVLC(&currSE, dP);
+ currMB->cbp = cbp = currSE.value1;
+
+
+ //============= Transform size flag for INTER MBs =============
+ //-------------------------------------------------------------
+ need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)||
+ (IS_DIRECT(currMB) && p_Vid->active_sps->direct_8x8_inference_flag) ||
+ (currMB->NoMbPartLessThan8x8Flag))
+ && currMB->mb_type != I8MB && currMB->mb_type != I4MB
+ && (currMB->cbp&15)
+ && p_Vid->Transform8x8Mode);
+
+ if (need_transform_size_flag)
+ {
+ dP = &(currSlice->partArr[partMap[SE_HEADER]]);
+ TRACE_STRING("transform_size_8x8_flag");
+
+ // read CAVLC transform_size_8x8_flag
+ currMB->luma_transform_size_8x8_flag = (Boolean) readSyntaxElement_FLC(dP->bitstream, 1);
+ }
+
+ //===== DQUANT =====
+ //----------------------
+ // Delta quant only if nonzero coeffs
+ if (cbp !=0)
+ {
+ read_delta_quant_CAVLC(&currSE, dP, currMB, partMap, (IS_INTER (currMB)) ? SE_DELTA_QUANT_INTER : SE_DELTA_QUANT_INTRA);
+
+ if (currSlice->dp_mode)
+ {
+ if (IS_INTER (currMB) && currSlice->dpC_NotPresent )
+ currMB->dpl_flag = 1;
+
+ if( intra && currSlice->dpB_NotPresent )
+ {
+ currMB->ei_flag = 1;
+ currMB->dpl_flag = 1;
+ }
+
+ // check for prediction from neighbours
+ check_dp_neighbors (currMB);
+ if (currMB->dpl_flag)
+ {
+ cbp = 0;
+ currMB->cbp = cbp;
+ }
+ }
+ }
+ }
+ else
+ {
+ cbp = currMB->cbp;
+ }
+
+ if (IS_I16MB (currMB)) // read DC coeffs for new intra modes
+ {
+ read_delta_quant_CAVLC(&currSE, dP, currMB, partMap, SE_DELTA_QUANT_INTRA);
+
+ macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y);
+
+ if (currSlice->dp_mode)
+ {
+ if (currSlice->dpB_NotPresent)
+ {
+ currMB->ei_flag = 1;
+ currMB->dpl_flag = 1;
+ }
+ check_dp_neighbors (currMB);
+ if (currMB->dpl_flag)
+ {
+ currMB->cbp = cbp = 0;
+ }
+ }
+
+ if (!currMB->dpl_flag)
+ {
+ pos_scan_4x4 = pos_scan4x4[0];
+
+ readCoeff4x4_CAVLC(currMB, LUMA_INTRA16x16DC, 0, 0, levarr, runarr, &numcoeff);
+
+ for(k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0) // leave if level == 0
+ {
+ pos_scan_4x4 += 2 * runarr[k];
+
+ i0 = (*pos_scan_4x4++);
+ j0 = (*pos_scan_4x4++);
+
+ currSlice->cof4[0][cof4_pos_to_subblock[j0][i0]][0][0] = levarr[k];// add new intra DC coeff
+ }
+ }
+
+
+ if(currMB->is_lossless == FALSE)
+ itrans_2(currMB, (ColorPlane) p_Vid->colour_plane_id);// transform new intra DC
+ }
+ }
+
+ update_qp(currMB, p_Vid->qp);
+
+ qp_per = p_Vid->qp_per_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ];
+ qp_rem = p_Vid->qp_rem_matrix[ currMB->qp_scaled[p_Vid->colour_plane_id] ];
+
+ //init quant parameters for chroma
+ if (dec_picture->chroma_format_idc != YUV400)
+ {
+ for(i=0; i < 2; ++i)
+ {
+ qp_per_uv[i] = p_Vid->qp_per_matrix[ currMB->qp_scaled[i + 1] ];
+ qp_rem_uv[i] = p_Vid->qp_rem_matrix[ currMB->qp_scaled[i + 1] ];
+ }
+ }
+
+ InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[p_Vid->colour_plane_id][qp_rem] : currSlice->InvLevelScale4x4_Inter[p_Vid->colour_plane_id][qp_rem];
+ InvLevelScale8x8 = intra? currSlice->InvLevelScale8x8_Intra[p_Vid->colour_plane_id][qp_rem] : currSlice->InvLevelScale8x8_Inter[p_Vid->colour_plane_id][qp_rem];
+
+ // luma coefficients
+ if (cbp)
+ {
+ if (!currMB->luma_transform_size_8x8_flag) // 4x4 transform
+ {
+ readCompCoeff4x4MB_CAVLC(currMB, PLANE_Y, InvLevelScale4x4, qp_per, cbp, p_Vid->nz_coeff[mb_nr][PLANE_Y]);
+ }
+ else // 8x8 transform
+ {
+ readCompCoeff8x8MB_CAVLC(currMB, PLANE_Y, InvLevelScale8x8, qp_per, cbp, p_Vid->nz_coeff[mb_nr][PLANE_Y]);
+ }
+ }
+ else
+ {
+ memset(&p_Vid->nz_coeff[mb_nr][0][0][0], 0, BLOCK_SIZE * BLOCK_SIZE * sizeof(byte));
+ }
+
+ if ( p_Vid->active_sps->chroma_format_idc==YUV444 && !IS_INDEPENDENT(p_Vid) )
+ {
+ for (uv = 0; uv < 2; ++uv )
+ {
+ /*----------------------16x16DC Luma_Add----------------------*/
+ if (IS_I16MB (currMB)) // read DC coeffs for new intra modes
+ {
+ macroblock_set_dc_pred(p_Vid, currMB->block_x, currMB->block_y);
+
+ if (uv == 0)
+ readCoeff4x4_CAVLC(currMB, CB_INTRA16x16DC, 0, 0, levarr, runarr, &numcoeff);
+ else
+ readCoeff4x4_CAVLC(currMB, CR_INTRA16x16DC, 0, 0, levarr, runarr, &numcoeff);
+
+ coef_ctr=-1;
+ level = 1; // just to get inside the loop
+
+ for(k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0) // leave if level == 0
+ {
+ coef_ctr += runarr[k] + 1;
+
+ i0 = pos_scan4x4[coef_ctr][0];
+ j0 = pos_scan4x4[coef_ctr][1];
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[j0][i0]][0][0] = levarr[k];// add new intra DC coeff
+ } //if leavarr[k]
+ } //k loop
+
+ if(currMB->is_lossless == FALSE)
+ {
+ itrans_2(currMB, (ColorPlane) (uv + 1)); // transform new intra DC
+ }
+ } //IS_I16MB
+
+ update_qp(currMB, p_Vid->qp);
+
+ qp_per = p_Vid->qp_per_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ];
+ qp_rem = p_Vid->qp_rem_matrix[ (p_Vid->qp + p_Vid->bitdepth_luma_qp_scale) ];
+
+ //init constants for every chroma qp offset
+ qp_per_uv[uv] = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ];
+ qp_rem_uv[uv] = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + p_Vid->bitdepth_chroma_qp_scale) ];
+
+ InvLevelScale4x4 = intra? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+ InvLevelScale8x8 = intra? currSlice->InvLevelScale8x8_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale8x8_Inter[uv + 1][qp_rem_uv[uv]];
+
+ if (!currMB->luma_transform_size_8x8_flag) // 4x4 transform
+ {
+ readCompCoeff4x4MB_CAVLC(currMB, (ColorPlane) (PLANE_U + uv), InvLevelScale4x4, qp_per_uv[uv], cbp, p_Vid->nz_coeff[mb_nr][PLANE_U + uv]);
+ }
+ else // 8x8 transform
+ {
+ readCompCoeff8x8MB_CAVLC(currMB, (ColorPlane) (PLANE_U + uv), InvLevelScale8x8, qp_per_uv[uv], cbp, p_Vid->nz_coeff[mb_nr][PLANE_U + uv]);
+ }
+ }
+ } //444
+ else if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ //========================== CHROMA DC ============================
+ //-----------------------------------------------------------------
+ // chroma DC coeff
+ if(cbp>15)
+ {
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ for (ll=0;ll<3;ll+=2)
+ {
+ uv = ll>>1;
+
+ InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+ //===================== CHROMA DC YUV420 ======================
+ memset(&currSlice->cofu[0], 0, 4 *sizeof(int));
+ coef_ctr=-1;
+
+ readCoeff4x4_CAVLC(currMB, CHROMA_DC, 0, 0, levarr, runarr, &numcoeff);
+
+ for(k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0)
+ {
+ currMB->cbp_blk[0] |= 0xf0000 << (ll<<1) ;
+ coef_ctr += runarr[k] + 1;
+ currSlice->cofu[coef_ctr]=levarr[k];
+ }
+ }
+
+ if (smb || (currMB->is_lossless == TRUE)) // check to see if MB type is SPred or SIntra4x4
+ {
+ currSlice->cof4[uv + 1][0][0][0] = currSlice->cofu[0];
+ currSlice->cof4[uv + 1][1][0][0] = currSlice->cofu[1];
+ currSlice->cof4[uv + 1][2][0][0] = currSlice->cofu[2];
+ currSlice->cof4[uv + 1][3][0][0] = currSlice->cofu[3];
+ }
+ else
+ {
+ ihadamard2x2(currSlice->cofu, temp);
+
+ currSlice->cof4[uv + 1][0][0][0] = (((temp[0] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ currSlice->cof4[uv + 1][1][0][0] = (((temp[1] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ currSlice->cof4[uv + 1][2][0][0] = (((temp[2] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ currSlice->cof4[uv + 1][3][0][0] = (((temp[3] * InvLevelScale4x4[0][0])<<qp_per_uv[uv])>>5);
+ }
+ }
+ }
+ else if (dec_picture->chroma_format_idc == YUV422)
+ {
+ for (ll=0;ll<3;ll+=2)
+ {
+ int (*InvLevelScale4x4)[4] = NULL;
+ uv = ll>>1;
+ {
+ h264_short_block_t *imgcof = currSlice->cof4[uv + 1];
+ int m3[2][4] = {{0,0,0,0},{0,0,0,0}};
+ int m4[2][4] = {{0,0,0,0},{0,0,0,0}};
+ int qp_per_uv_dc = p_Vid->qp_per_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only
+ int qp_rem_uv_dc = p_Vid->qp_rem_matrix[ (currMB->qpc[uv] + 3 + p_Vid->bitdepth_chroma_qp_scale) ]; //for YUV422 only
+ if (intra)
+ InvLevelScale4x4 = currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv_dc];
+ else
+ InvLevelScale4x4 = currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv_dc];
+
+
+ //===================== CHROMA DC YUV422 ======================
+ readCoeff4x4_CAVLC(currMB, CHROMA_DC, 0, 0, levarr, runarr, &numcoeff);
+ coef_ctr=-1;
+ level=1;
+ for(k = 0; k < numcoeff; ++k)
+ {
+ if (levarr[k] != 0)
+ {
+ currMB->cbp_blk[0] |= ((int64)0xff0000) << (ll<<2);
+ coef_ctr += runarr[k]+1;
+ i0 = SCAN_YUV422[coef_ctr][0];
+ j0 = SCAN_YUV422[coef_ctr][1];
+
+ m3[i0][j0]=levarr[k];
+ }
+ }
+
+ // inverse CHROMA DC YUV422 transform
+ // horizontal
+ if(currMB->is_lossless == FALSE)
+ {
+ m4[0][0] = m3[0][0] + m3[1][0];
+ m4[0][1] = m3[0][1] + m3[1][1];
+ m4[0][2] = m3[0][2] + m3[1][2];
+ m4[0][3] = m3[0][3] + m3[1][3];
+
+ m4[1][0] = m3[0][0] - m3[1][0];
+ m4[1][1] = m3[0][1] - m3[1][1];
+ m4[1][2] = m3[0][2] - m3[1][2];
+ m4[1][3] = m3[0][3] - m3[1][3];
+
+ for (i = 0; i < 2; ++i)
+ {
+ m6[0] = m4[i][0] + m4[i][2];
+ m6[1] = m4[i][0] - m4[i][2];
+ m6[2] = m4[i][1] - m4[i][3];
+ m6[3] = m4[i][1] + m4[i][3];
+
+ imgcof[cof4_pos_to_subblock[0][i]][0][0] = m6[0] + m6[3];
+ imgcof[cof4_pos_to_subblock[1][i]][0][0] = m6[1] + m6[2];
+ imgcof[cof4_pos_to_subblock[2][i]][0][0] = m6[1] - m6[2];
+ imgcof[cof4_pos_to_subblock[3][i]][0][0] = m6[0] - m6[3];
+ }//for (i=0;i<2;++i)
+ }
+ else
+ {
+ currSlice->cof4[uv + 1][0][0][0] = m3[0][0];
+ currSlice->cof4[uv + 1][1][0][0] = m3[1][0];
+ currSlice->cof4[uv + 1][2][0][0] = m3[0][1];
+ currSlice->cof4[uv + 1][3][0][0] = m3[1][1];
+ currSlice->cof4[uv + 1][8][0][0] = m3[0][2];
+ currSlice->cof4[uv + 1][9][0][0] = m3[1][2];
+ currSlice->cof4[uv + 1][10][0][0] = m3[0][3];
+ currSlice->cof4[uv + 1][11][0][0] = m3[1][3];
+ }
+
+ for(j = 0;j < 16; j += BLOCK_SIZE)
+ {
+ for(i=0;i < 8;i+=BLOCK_SIZE)
+ {
+ imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] = rshift_rnd_sf((imgcof[cof4_pos_to_subblock[j>>2][i>>2]][0][0] * InvLevelScale4x4[0][0]) << qp_per_uv_dc, 6);
+ }
+ }
+ }
+ }//for (ll=0;ll<3;ll+=2)
+ }//else if (dec_picture->chroma_format_idc == YUV422)
+ }
+
+ //========================== CHROMA AC ============================
+ //-----------------------------------------------------------------
+ // chroma AC coeff, all zero fram start_scan
+ if (cbp<=31)
+ {
+ memset(&p_Vid->nz_coeff [mb_nr ][1][0][0], 0, 2 * BLOCK_SIZE * BLOCK_SIZE * sizeof(byte));
+ }
+ else
+ {
+ if(currMB->is_lossless == FALSE)
+ {
+ for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8)
+ {
+ currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 ));
+ InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+
+ for (b4=0; b4 < 4; ++b4)
+ {
+ i = cofuv_blk_x[yuv][b8][b4];
+ j = cofuv_blk_y[yuv][b8][b4];
+
+ readCoeff4x4_CAVLC(currMB, CHROMA_AC, i + 2*uv, j + 4, levarr, runarr, &numcoeff);
+ coef_ctr = 0;
+
+ for(k = 0; k < numcoeff;++k)
+ {
+ if (levarr[k] != 0)
+ {
+ currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4];
+ coef_ctr += runarr[k] + 1;
+
+ i0=pos_scan4x4[coef_ctr][0];
+ j0=pos_scan4x4[coef_ctr][1];
+
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][j0][i0] = rshift_rnd_sf((levarr[k] * InvLevelScale4x4[j0][i0])<<qp_per_uv[uv], 4);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ int type;
+ currMB->is_intra_block = IS_INTRA(currMB);
+ type = (currMB->is_intra_block ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER);
+
+ dP = &(currSlice->partArr[partMap[type]]);
+ currSE.mapping = linfo_levrun_inter;
+
+ if(currMB->is_lossless == FALSE)
+ {
+ for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8)
+ {
+ currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 ));
+ InvLevelScale4x4 = intra ? currSlice->InvLevelScale4x4_Intra[uv + 1][qp_rem_uv[uv]] : currSlice->InvLevelScale4x4_Inter[uv + 1][qp_rem_uv[uv]];
+
+ for (b4 = 0; b4 < 4; ++b4)
+ {
+ i = cofuv_blk_x[yuv][b8][b4];
+ j = cofuv_blk_y[yuv][b8][b4];
+
+ currMB->subblock_y = subblk_offset_y[yuv][b8][b4];
+ currMB->subblock_x = subblk_offset_x[yuv][b8][b4];
+
+ pos_scan_4x4 = pos_scan4x4[1];
+
+ for(k = 0; k < 16;k++)
+ {
+ readSyntaxElement_UVLC(&currSE, dP);
+ level = currSE.value1;
+
+ if (level != 0)
+ {
+ currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4];
+ pos_scan_4x4 += (currSE.value2 << 1);
+
+ i0 = *pos_scan_4x4++;
+ j0 = *pos_scan_4x4++;
+
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][j0][i0] = rshift_rnd_sf((level * InvLevelScale4x4[j0][i0])<<qp_per_uv[uv], 4);
+ }
+ else
+ break;
+ } //for(k=0;(k<16)&&(level!=0);++k)
+ }
+ }
+ }
+ else
+ {
+ for (b8=0; b8 < p_Vid->num_blk8x8_uv; ++b8)
+ {
+ currMB->is_v_block = uv = (b8 > ((p_Vid->num_uv_blocks) - 1 ));
+
+ for (b4=0; b4 < 4; ++b4)
+ {
+ i = cofuv_blk_x[yuv][b8][b4];
+ j = cofuv_blk_y[yuv][b8][b4];
+
+ pos_scan_4x4 = pos_scan4x4[1];
+
+ currMB->subblock_y = subblk_offset_y[yuv][b8][b4];
+ currMB->subblock_x = subblk_offset_x[yuv][b8][b4];
+
+ for(k=0;k<16;++k)
+ {
+ readSyntaxElement_UVLC(&currSE, dP);
+ level = currSE.value1;
+
+ if (level != 0)
+ {
+ currMB->cbp_blk[0] |= ((int64)1) << cbp_blk_chroma[b8][b4];
+ pos_scan_4x4 += (currSE.value2 << 1);
+
+ i0 = *pos_scan_4x4++;
+ j0 = *pos_scan_4x4++;
+
+ currSlice->cof4[uv + 1][cof4_pos_to_subblock[j][i]][j0][i0] = level;
+ }
+ else
+ break;
+ }
+ }
+ }
+ } //for (b4=0; b4 < 4; b4++)
+ } //for (b8=0; b8 < p_Vid->num_blk8x8_uv; b8++)
+ } //if (dec_picture->chroma_format_idc != YUV400)
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* decode one color component in an I slice
+************************************************************************
+*/
+
+static int decode_one_component_i_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ //For residual DPCM
+ currMB->ipmode_DPCM = NO_INTRA_PMODE;
+ if(currMB->mb_type == IPCM)
+ mb_pred_ipcm(currMB);
+ else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16
+ mb_pred_intra16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I4MB)
+ mb_pred_intra4x4(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I8MB)
+ mb_pred_intra8x8(currMB, curr_plane, image, dec_picture);
+
+ return 1;
+}
+
+/*!
+************************************************************************
+* \brief
+* decode one color component for a p slice
+************************************************************************
+*/
+static int decode_one_component_p_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ //For residual DPCM
+ currMB->ipmode_DPCM = NO_INTRA_PMODE;
+ if(currMB->mb_type == IPCM)
+ mb_pred_ipcm(currMB);
+ else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16
+ mb_pred_intra16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I4MB)
+ mb_pred_intra4x4(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I8MB)
+ mb_pred_intra8x8(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == PSKIP)
+ mb_pred_skip(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P16x16)
+ mb_pred_p_inter16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P16x8)
+ mb_pred_p_inter16x8(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P8x16)
+ mb_pred_p_inter8x16(currMB, curr_plane, image, dec_picture);
+ else
+ mb_pred_p_inter8x8(currMB, curr_plane, image, dec_picture);
+
+ return 1;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* decode one color component for a sp slice
+************************************************************************
+*/
+static int decode_one_component_sp_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ //For residual DPCM
+ currMB->ipmode_DPCM = NO_INTRA_PMODE;
+
+ if(currMB->mb_type == IPCM)
+ mb_pred_ipcm(currMB);
+ else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16
+ mb_pred_intra16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I4MB)
+ mb_pred_intra4x4(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I8MB)
+ mb_pred_intra8x8(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == PSKIP)
+ mb_pred_sp_skip(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P16x16)
+ mb_pred_p_inter16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P16x8)
+ mb_pred_p_inter16x8(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P8x16)
+ mb_pred_p_inter8x16(currMB, curr_plane, image, dec_picture);
+ else
+ mb_pred_p_inter8x8(currMB, curr_plane, image, dec_picture);
+
+ return 1;
+}
+
+static void set_chroma_vector(Macroblock *currMB, int *list_offset)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (!currSlice->mb_aff_frame_flag)
+ {
+ if(p_Vid->structure == TOP_FIELD)
+ {
+ int k,l;
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ if(p_Vid->structure != p_Vid->listX[l][k]->structure)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = -2;
+ else
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ else if(p_Vid->structure == BOTTOM_FIELD)
+ {
+ int k,l;
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ if (p_Vid->structure != p_Vid->listX[l][k]->structure)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = 2;
+ else
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ else
+ {
+ int k,l;
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ }
+ else
+ {
+ int mb_nr = (currMB->mbAddrX & 0x01);
+ int k,l;
+
+ //////////////////////////
+ // find out the correct list offsets
+ if (currMB->mb_field)
+ {
+ *list_offset = mb_nr ? 4 : 2;
+
+ for (l = LIST_0 + *list_offset; l <= (LIST_1 + *list_offset); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ if(mb_nr == 0 && p_Vid->listX[l][k]->structure == BOTTOM_FIELD)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = -2;
+ else if(mb_nr == 1 && p_Vid->listX[l][k]->structure == TOP_FIELD)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = 2;
+ else
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ else
+ {
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ }
+
+ p_Vid->max_mb_vmv_r = (p_Vid->structure != FRAME || (currSlice->mb_aff_frame_flag && currMB->mb_field)) ? p_Vid->max_vmv_r >> 1 : p_Vid->max_vmv_r;
+}
+
+
+static void mb_pred_b_dspatial(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ char l0_rFrame = -1, l1_rFrame = -1;
+ PicMotionParams *motion = &dec_picture->motion;
+ MotionVector pmvl0={0,0}, pmvl1={0,0};
+ int k;
+ int block8x8;
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+
+ MotionParams *colocated = &currSlice->p_colocated->frame;
+ int list_offset = 0;
+ int pred_dir = 0;
+
+ Boolean has_zero_partitions = FALSE;
+ h264_ref_t *ref_pic_num_l0, *ref_pic_num_l1;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ if (currMB->mb_field)
+ {
+ if(currMB->mbAddrX & 0x01)
+ {
+ colocated = &currSlice->p_colocated->bottom;
+ }
+ else
+ {
+ colocated = &currSlice->p_colocated->top;
+ }
+ }
+
+ prepare_direct_params(currMB, dec_picture, pmvl0, pmvl1, &l0_rFrame, &l1_rFrame);
+
+ ref_pic_num_l0 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset];
+ ref_pic_num_l1 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset];
+
+ if (p_Vid->active_sps->direct_8x8_inference_flag)
+ {
+ if (l0_rFrame >=0 && l1_rFrame >=0)
+ {
+ PicMotion **motion0 = &motion->motion[LIST_0][currMB->block_y];
+ PicMotion **motion1 = &motion->motion[LIST_1][currMB->block_y];
+ int block_x = currMB->block_x;
+ has_zero_partitions = TRUE;
+ pred_dir = 2;
+ if (p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)
+ { // long term
+ //---
+ memcpy(motion0[0][block_x + 0].mv, pmvl0, sizeof(MotionVector));
+ motion0[0][block_x + 0].ref_idx = l0_rFrame;
+ memcpy(motion1[0][block_x + 0].mv, pmvl1, sizeof(MotionVector));
+ motion1[0][block_x + 0].ref_idx = l1_rFrame;
+ motion0[0][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 0].ref_idx];
+ motion1[0][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 0].ref_idx];
+ memcpy(motion0[0][block_x + 1].mv, pmvl0, sizeof(MotionVector));
+ motion0[0][block_x + 1].ref_idx = l0_rFrame;
+ memcpy(motion1[0][block_x + 1].mv, pmvl1, sizeof(MotionVector));
+ motion1[0][block_x + 1].ref_idx = l1_rFrame;
+ motion0[0][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 1].ref_idx];
+ motion1[0][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 1].ref_idx];
+ memcpy(motion0[1][block_x + 0].mv, pmvl0, sizeof(MotionVector));
+ motion0[1][block_x + 0].ref_idx = l0_rFrame;
+ memcpy(motion1[1][block_x + 0].mv, pmvl1, sizeof(MotionVector));
+ motion1[1][block_x + 0].ref_idx = l1_rFrame;
+ motion0[1][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 0].ref_idx];
+ motion1[1][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 0].ref_idx];
+ memcpy(motion0[1][block_x + 1].mv, pmvl0, sizeof(MotionVector));
+ motion0[1][block_x + 1].ref_idx = l0_rFrame;
+ memcpy(motion1[1][block_x + 1].mv, pmvl1, sizeof(MotionVector));
+ motion1[1][block_x + 1].ref_idx = l1_rFrame;
+ motion0[1][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 1].ref_idx];
+ motion1[1][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 1].ref_idx];
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 0, 0, list_offset, curr_mb_field);
+ //---
+ memcpy(motion0[0][block_x + 2].mv, pmvl0, sizeof(MotionVector));
+ motion0[0][block_x + 2].ref_idx = l0_rFrame;
+ memcpy(motion1[0][block_x + 2].mv, pmvl1, sizeof(MotionVector));
+ motion1[0][block_x + 2].ref_idx = l1_rFrame;
+ motion0[0][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 2].ref_idx];
+ motion1[0][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 2].ref_idx];
+ memcpy(motion0[0][block_x + 3].mv, pmvl0, sizeof(MotionVector));
+ motion0[0][block_x + 3].ref_idx = l0_rFrame;
+ memcpy(motion1[0][block_x + 3].mv, pmvl1, sizeof(MotionVector));
+ motion1[0][block_x + 3].ref_idx = l1_rFrame;
+ motion0[0][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[0][block_x + 3].ref_idx];
+ motion1[0][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[0][block_x + 3].ref_idx];
+ memcpy(motion0[1][block_x + 2].mv, pmvl0, sizeof(MotionVector));
+ motion0[1][block_x + 2].ref_idx = l0_rFrame;
+ memcpy(motion1[1][block_x + 2].mv, pmvl1, sizeof(MotionVector));
+ motion1[1][block_x + 2].ref_idx = l1_rFrame;
+ motion0[1][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 2].ref_idx];
+ motion1[1][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 2].ref_idx];
+ memcpy(motion0[1][block_x + 3].mv, pmvl0, sizeof(MotionVector));
+ motion0[1][block_x + 3].ref_idx = l0_rFrame;
+ memcpy(motion1[1][block_x + 3].mv, pmvl1, sizeof(MotionVector));
+ motion1[1][block_x + 3].ref_idx = l1_rFrame;
+ motion0[1][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[1][block_x + 3].ref_idx];
+ motion1[1][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[1][block_x + 3].ref_idx];
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 2, 0, list_offset, curr_mb_field);
+ //---
+ memcpy(motion0[2][block_x + 0].mv, pmvl0, sizeof(MotionVector));
+ motion0[2][block_x + 0].ref_idx = l0_rFrame;
+ memcpy(motion1[2][block_x + 0].mv, pmvl1, sizeof(MotionVector));
+ motion1[2][block_x + 0].ref_idx = l1_rFrame;
+ motion0[2][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 0].ref_idx];
+ motion1[2][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 0].ref_idx];
+ memcpy(motion0[2][block_x + 1].mv, pmvl0, sizeof(MotionVector));
+ motion0[2][block_x + 1].ref_idx = l0_rFrame;
+ memcpy(motion1[2][block_x + 1].mv, pmvl1, sizeof(MotionVector));
+ motion1[2][block_x + 1].ref_idx = l1_rFrame;
+ motion0[2][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 1].ref_idx];
+ motion1[2][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 1].ref_idx];
+ memcpy(motion0[3][block_x + 0].mv, pmvl0, sizeof(MotionVector));
+ motion0[3][block_x + 0].ref_idx = l0_rFrame;
+ memcpy(motion1[3][block_x + 0].mv, pmvl1, sizeof(MotionVector));
+ motion1[3][block_x + 0].ref_idx = l1_rFrame;
+ motion0[3][block_x + 0].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 0].ref_idx];
+ motion1[3][block_x + 0].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 0].ref_idx];
+ memcpy(motion0[3][block_x + 1].mv, pmvl0, sizeof(MotionVector));
+ motion0[3][block_x + 1].ref_idx = l0_rFrame;
+ memcpy(motion1[3][block_x + 1].mv, pmvl1, sizeof(MotionVector));
+ motion1[3][block_x + 1].ref_idx = l1_rFrame;
+ motion0[3][block_x + 1].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 1].ref_idx];
+ motion1[3][block_x + 1].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 1].ref_idx];
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 0, 2, list_offset, curr_mb_field);
+ //---
+ memcpy(motion0[2][block_x + 2].mv, pmvl0, sizeof(MotionVector));
+ motion0[2][block_x + 2].ref_idx = l0_rFrame;
+ memcpy(motion1[2][block_x + 2].mv, pmvl1, sizeof(MotionVector));
+ motion1[2][block_x + 2].ref_idx = l1_rFrame;
+ motion0[2][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 2].ref_idx];
+ motion1[2][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 2].ref_idx];
+ memcpy(motion0[2][block_x + 3].mv, pmvl0, sizeof(MotionVector));
+ motion0[2][block_x + 3].ref_idx = l0_rFrame;
+ memcpy(motion1[2][block_x + 3].mv, pmvl1, sizeof(MotionVector));
+ motion1[2][block_x + 3].ref_idx = l1_rFrame;
+ motion0[2][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[2][block_x + 3].ref_idx];
+ motion1[2][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[2][block_x + 3].ref_idx];
+ memcpy(motion0[3][block_x + 2].mv, pmvl0, sizeof(MotionVector));
+ motion0[3][block_x + 2].ref_idx = l0_rFrame;
+ memcpy(motion1[3][block_x + 2].mv, pmvl1, sizeof(MotionVector));
+ motion1[3][block_x + 2].ref_idx = l1_rFrame;
+ motion0[3][block_x + 2].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 2].ref_idx];
+ motion1[3][block_x + 2].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 2].ref_idx];
+ memcpy(motion0[3][block_x + 3].mv, pmvl0, sizeof(MotionVector));
+ motion0[3][block_x + 3].ref_idx = l0_rFrame;
+ memcpy(motion1[3][block_x + 3].mv, pmvl1, sizeof(MotionVector));
+ motion1[3][block_x + 3].ref_idx = l1_rFrame;
+ motion0[3][block_x + 3].ref_pic_id = ref_pic_num_l0[(short)motion0[3][block_x + 3].ref_idx];
+ motion1[3][block_x + 3].ref_pic_id = ref_pic_num_l1[(short)motion1[3][block_x + 3].ref_idx];
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, 2, 2, list_offset, curr_mb_field);
+ }
+ else
+ { // not long term
+ const byte **colocated_moving_block = &colocated->moving_block[currMB->block_y_aff];
+ for (block8x8 = 0; block8x8 < 4; block8x8++)
+ {
+ int k_start = (block8x8 << 2);
+ for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ int i4 = currMB->block_x + i;
+
+ //===== DIRECT PREDICTION =====
+ if (!l0_rFrame && !colocated_moving_block[j][i4])
+ {
+ motion0[j][i4].mv[0] = 0;
+ motion0[j][i4].mv[1] = 0;
+ motion0[j][i4].ref_idx = 0;
+ }
+ else
+ {
+ motion0[j][i4].mv[0] = pmvl0[0];
+ motion0[j][i4].mv[1] = pmvl0[1];
+ motion0[j][i4].ref_idx = l0_rFrame;
+ }
+
+ if (l1_rFrame == 0 && !colocated_moving_block[j][i4])
+ {
+ motion1[j][i4].mv[0] = 0;
+ motion1[j][i4].mv[1] = 0;
+ motion1[j][i4].ref_idx = 0;
+ }
+ else
+ {
+ motion1[j][i4].mv[0] = pmvl1[0];
+ motion1[j][i4].mv[1] = pmvl1[1];
+ motion1[j][i4].ref_idx = l1_rFrame;
+ }
+
+ motion0[j][i4].ref_pic_id = ref_pic_num_l0[(short)motion0[j][i4].ref_idx];
+ motion1[j][i4].ref_pic_id = ref_pic_num_l1[(short)motion1[j][i4].ref_idx];
+ }
+
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, (decode_block_scan[k_start] & 3), ((decode_block_scan[k_start] >> 2) & 3), list_offset, curr_mb_field);
+ }
+ }
+ }
+ else
+ {
+ for (block8x8 = 0; block8x8 < 4; block8x8++)
+ {
+ int k_start = (block8x8 << 2);
+ for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int j6 = currMB->block_y_aff + j;
+
+ //printf("%d %d\n", i, j);
+
+ //===== DIRECT PREDICTION =====
+
+ if (l0_rFrame >=0)
+ {
+ if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0];
+ motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1];
+ motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = -1;
+ }
+
+ if (l1_rFrame >=0)
+ {
+ if (l1_rFrame == 0 && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0];
+ motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1];
+ motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = -1;
+ }
+
+ if (l1_rFrame == -1)
+ pred_dir = 0;
+ else if (l0_rFrame == -1)
+ pred_dir = 1;
+ else
+ pred_dir = 2;
+
+ if (l0_rFrame < 0 && l1_rFrame < 0)
+ {
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ pred_dir = 2;
+ }
+
+ motion->motion[LIST_0][j4][i4].ref_pic_id = ref_pic_num_l0[(short)motion->motion[LIST_0][j4][i4].ref_idx];
+ motion->motion[LIST_1][j4][i4].ref_pic_id = ref_pic_num_l1[(short)motion->motion[LIST_1][j4][i4].ref_idx];
+ }
+
+ if (has_zero_partitions == TRUE)
+ {
+ int i = (decode_block_scan[k_start] & 3);
+ int j = ((decode_block_scan[k_start] >> 2) & 3);
+
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field);
+ }
+ }
+ }
+ }
+ else
+ {
+ for (block8x8 = 0; block8x8 < 4; block8x8++)
+ {
+ int k_start = (block8x8 << 2);
+ int k_end = k_start + BLOCK_MULTIPLE;
+
+ for (k = k_start; k < k_end; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int j6 = currMB->block_y_aff + j;
+
+ //===== DIRECT PREDICTION =====
+
+ if (l0_rFrame >=0)
+ {
+ if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0];
+ motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1];
+ motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = -1;
+ }
+
+ if (l1_rFrame >=0)
+ {
+ if (l1_rFrame == 0 && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ has_zero_partitions = TRUE;
+ motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0];
+ motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1];
+ motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = -1;
+ }
+
+ if (l0_rFrame < 0 && l1_rFrame < 0)
+ {
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+
+ if (l1_rFrame == -1)
+ {
+ if (l0_rFrame == -1)
+ pred_dir = 2;
+ else
+ pred_dir = 0;
+ }
+ else if (l0_rFrame == -1)
+ {
+ pred_dir = 1;
+ }
+ else
+ pred_dir = 2;
+
+ motion->motion[LIST_0][j4][i4].ref_pic_id = ref_pic_num_l0[(short)motion->motion[LIST_0][j4][i4].ref_idx];
+ motion->motion[LIST_1][j4][i4].ref_pic_id = ref_pic_num_l1[(short)motion->motion[LIST_1][j4][i4].ref_idx];
+ }
+
+ if (has_zero_partitions == TRUE)
+ {
+ for (k = k_start; k < k_end; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, BLOCK_SIZE, BLOCK_SIZE, curr_mb_field);
+ }
+ }
+ }
+ }
+
+ if (has_zero_partitions == FALSE)
+ {
+ perform_mc16x16(currMB, curr_plane, dec_picture, pred_dir, list_offset, curr_mb_field);
+ }
+
+ if (currMB->cbp == 0)
+ {
+ opt_copy_image_data_16x16_stride(image, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[curr_plane]);
+
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ copy_image_data_8x8_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1]);
+ copy_image_data_8x8_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2]);
+ }
+ else if (dec_picture->chroma_format_idc == YUV422)
+ {
+ copy_image_data_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1], 8, 16);
+ copy_image_data_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2], 8, 16);
+ }
+ }
+ else
+ iTransform(currMB, curr_plane, 0);
+}
+
+
+
+/*!
+************************************************************************
+* \brief
+* decode one color component for a b slice
+************************************************************************
+*/
+
+static int decode_one_component_b_slice(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ //For residual DPCM
+ currMB->ipmode_DPCM = NO_INTRA_PMODE;
+
+ if(currMB->mb_type == IPCM)
+ mb_pred_ipcm(currMB);
+ else if (IS_I16MB (currMB)) // get prediction for INTRA_MB_16x16
+ mb_pred_intra16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I4MB)
+ mb_pred_intra4x4(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == I8MB)
+ mb_pred_intra8x8(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P16x16)
+ mb_pred_p_inter16x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P16x8)
+ mb_pred_p_inter16x8(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == P8x16)
+ mb_pred_p_inter8x16(currMB, curr_plane, image, dec_picture);
+ else if (currMB->mb_type == BSKIP_DIRECT)
+ {
+ if (currMB->p_Slice->direct_spatial_mv_pred_flag == 0)
+ mb_pred_b_dtemporal (currMB, curr_plane, image, dec_picture);
+ else
+ mb_pred_b_dspatial (currMB, curr_plane, image, dec_picture);
+ }
+ else
+ mb_pred_b_inter8x8 (currMB, curr_plane, image, dec_picture);
+
+ return 1;
+}
+
+/*!
+************************************************************************
+* \brief
+* decode one macroblock
+************************************************************************
+*/
+
+int decode_one_macroblock(Macroblock *currMB, StorablePicture *dec_picture)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ // luma decoding **************************************************
+ currSlice->decode_one_component(currMB, PLANE_Y, dec_picture->imgY, dec_picture);
+
+ if ((p_Vid->active_sps->chroma_format_idc==YUV444)&&(!IS_INDEPENDENT(p_Vid)))
+ {
+ currSlice->decode_one_component(currMB, PLANE_U, dec_picture->imgUV[0], dec_picture);
+ currSlice->decode_one_component(currMB, PLANE_V, dec_picture->imgUV[1], dec_picture);
+ }
+ return 0;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* change target plane
+* for 4:4:4 Independent mode
+************************************************************************
+*/
+void change_plane_JV( VideoParameters *p_Vid, int nplane )
+{
+ Slice *currSlice = p_Vid->currentSlice;
+ p_Vid->colour_plane_id = nplane;
+ p_Vid->mb_data = p_Vid->mb_data_JV[nplane];
+ p_Vid->dec_picture = p_Vid->dec_picture_JV[nplane];
+ currSlice->p_colocated = currSlice->Co_located_JV[nplane];
+}
+
+/*!
+************************************************************************
+* \brief
+* make frame picture from each plane data
+* for 4:4:4 Independent mode
+************************************************************************
+*/
+void make_frame_picture_JV(VideoParameters *p_Vid)
+{
+ int uv, line;
+ int nsize;
+ int nplane;
+ p_Vid->dec_picture = p_Vid->dec_picture_JV[0];
+
+ // Copy Storable Params
+ for( nplane=0; nplane<MAX_PLANE; nplane++ )
+ {
+ copy_storable_param_JV( p_Vid, &p_Vid->dec_picture->JVmotion[nplane], &p_Vid->dec_picture_JV[nplane]->motion );
+ }
+
+ // This could be done with pointers and seems not necessary
+ for( uv=0; uv<2; uv++ )
+ {
+ for( line=0; line<p_Vid->height; line++ )
+ {
+ nsize = sizeof(imgpel) * p_Vid->width;
+ memcpy( p_Vid->dec_picture->imgUV[uv]->img[line], p_Vid->dec_picture_JV[uv+1]->imgY->img[line], nsize );
+ }
+ free_storable_picture(p_Vid, p_Vid->dec_picture_JV[uv+1]);
+ }
+}
+
+
diff --git a/Src/h264dec/ldecod/src/mb_access.c b/Src/h264dec/ldecod/src/mb_access.c
new file mode 100644
index 00000000..70f3aee2
--- /dev/null
+++ b/Src/h264dec/ldecod/src/mb_access.c
@@ -0,0 +1,3388 @@
+
+/*!
+*************************************************************************************
+* \file mb_access.c
+*
+* \brief
+* Functions for macroblock neighborhoods
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Karsten Sühring <suehring@hhi.de>
+*************************************************************************************
+*/
+
+#include "global.h"
+#include "mbuffer.h"
+#include "mb_access.h"
+
+/*!
+************************************************************************
+* \brief
+* returns 1 if the macroblock at the given address is available
+************************************************************************
+*/
+Boolean mb_is_available(int mbAddr, const Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ if ((mbAddr < 0) || (mbAddr > ((int)p_Vid->dec_picture->PicSizeInMbs - 1)))
+ return FALSE;
+
+ // the following line checks both: slice number and if the mb has been decoded
+ if (!p_Vid->DeblockCall)
+ {
+ if (p_Vid->mb_data[mbAddr].slice_nr != currMB->slice_nr)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Checks the availability of neighboring macroblocks of
+* the current macroblock for prediction and context determination;
+************************************************************************
+*/
+void CheckAvailabilityOfNeighbors(Macroblock *currMB)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int mb_nr = currMB->mbAddrX;
+
+ // mark all neighbors as unavailable
+ currMB->mb_up = NULL;
+ currMB->mb_left = NULL;
+
+ if (p_Vid->dec_picture->mb_aff_frame_flag)
+ {
+ int cur_mb_pair = mb_nr >> 1;
+ currMB->mb_addr_left = 2 * (cur_mb_pair - 1);
+ currMB->mb_addr_up = 2 * (cur_mb_pair - p_Vid->dec_picture->PicWidthInMbs);
+ currMB->mb_addr_upper_right = 2 * (cur_mb_pair - p_Vid->dec_picture->PicWidthInMbs + 1);
+ currMB->mb_addr_upper_left = 2 * (cur_mb_pair - p_Vid->dec_picture->PicWidthInMbs - 1);
+
+ currMB->mb_avail_left = (Boolean) (mb_is_available(currMB->mb_addr_left, currMB) && ((p_Vid->PicPos[cur_mb_pair ][0])!=0));
+ currMB->mb_avail_up = (Boolean) (mb_is_available(currMB->mb_addr_up, currMB));
+ currMB->mb_avail_upper_right = (Boolean) (mb_is_available(currMB->mb_addr_upper_right, currMB) && ((p_Vid->PicPos[cur_mb_pair + 1][0])!=0));
+ currMB->mb_avail_upper_left = (Boolean) (mb_is_available(currMB->mb_addr_upper_left, currMB) && ((p_Vid->PicPos[cur_mb_pair ][0])!=0));
+ }
+ else
+ {
+ currMB->mb_addr_left = mb_nr - 1; // left?
+ currMB->mb_addr_up = mb_nr - p_Vid->dec_picture->PicWidthInMbs; // up?
+ currMB->mb_addr_upper_right = mb_nr - p_Vid->dec_picture->PicWidthInMbs + 1; // upper right?
+ currMB->mb_addr_upper_left = mb_nr - p_Vid->dec_picture->PicWidthInMbs - 1; // upper left?
+
+ currMB->mb_avail_left = (Boolean) (mb_is_available(currMB->mb_addr_left, currMB) && ((p_Vid->PicPos[mb_nr ][0])!=0));
+ currMB->mb_avail_up = (Boolean) (mb_is_available(currMB->mb_addr_up, currMB));
+ currMB->mb_avail_upper_right = (Boolean) (mb_is_available(currMB->mb_addr_upper_right, currMB) && ((p_Vid->PicPos[mb_nr + 1][0])!=0));
+ currMB->mb_avail_upper_left = (Boolean) (mb_is_available(currMB->mb_addr_upper_left, currMB) && ((p_Vid->PicPos[mb_nr ][0])!=0));
+ }
+
+ if (currMB->mb_avail_left) currMB->mb_left = &(p_Vid->mb_data[currMB->mb_addr_left]);
+ if (currMB->mb_avail_up) currMB->mb_up = &(p_Vid->mb_data[currMB->mb_addr_up]);
+}
+
+
+/*!
+************************************************************************
+* \brief
+* returns the x and y macroblock coordinates for a given MbAddress
+************************************************************************
+*/
+void get_mb_block_pos_normal (const h264_pic_position *PicPos, int mb_addr, short *x, short *y)
+{
+ *x = (short) PicPos[ mb_addr ][0];
+ *y = (short) PicPos[ mb_addr ][1];
+}
+
+/*!
+************************************************************************
+* \brief
+* returns the x and y macroblock coordinates for a given MbAddress
+* for mbaff type slices
+************************************************************************
+*/
+void get_mb_block_pos_mbaff (const h264_pic_position *PicPos, int mb_addr, short *x, short *y)
+{
+ *x = (short) PicPos[mb_addr>>1][0];
+ *y = (short) ((PicPos[mb_addr>>1][1] << 1) + (mb_addr & 0x01));
+}
+
+/*!
+************************************************************************
+* \brief
+* returns the x and y sample coordinates for a given MbAddress
+************************************************************************
+*/
+void get_mb_pos (VideoParameters *p_Vid, int mb_addr, const int mb_size[2], short *x, short *y)
+{
+ p_Vid->get_mb_block_pos(p_Vid->PicPos, mb_addr, x, y);
+
+ (*x) = (short) ((*x) * mb_size[0]);
+ (*y) = (short) ((*y) * mb_size[1]);
+}
+
+
+/*!
+************************************************************************
+* \brief
+* get neighbouring positions for non-aff coding
+* \param currMB
+* current macroblock
+* \param xN
+* input x position
+* \param yN
+* input y position
+* \param mb_size
+* Macroblock size in pixel (according to luma or chroma MB access)
+* \param pix
+* returns position informations
+************************************************************************
+*/
+void getNonAffNeighbour(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ if (xN < 0)
+ {
+ if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ }
+ else if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (xN < maxW)
+ {
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & (maxW - 1));
+ pix->pos_x = (short) (pix->x + *(CurPos++) * maxW);
+ pix->y = (short) (yN & (maxH - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbourXP_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ if (xN < 0)
+ {
+ if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (xN < maxW)
+ {
+ if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yN & (maxH - 1));
+ }
+}
+
+void getNonAffNeighbourPX_NoPos(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ if (xN < maxW)
+ {
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yN & (maxH - 1));
+ }
+}
+
+void getNonAffNeighbourLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (xN < 0)
+ {
+ if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ }
+ else if (yN < 16)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (xN < 16)
+ {
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else if (yN < 16)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & 15);
+ pix->pos_x = (short) (pix->x + *(CurPos++) * 16);
+ pix->y = (short) (yN & 15);
+ pix->pos_y = (short) (pix->y + *CurPos * 16);
+ }
+}
+
+void getNonAffNeighbourXPLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (xN < 0)
+ {
+ if (yN < 16)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (xN < 16)
+ {
+ if (yN < 16)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & 15);
+ pix->pos_x = (short) (pix->x + *(CurPos++) * 16);
+ pix->y = (short) (yN & 15);
+ pix->pos_y = (short) (pix->y + *CurPos * 16);
+ }
+}
+
+
+void getNonAffNeighbourXPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ assert(!p_Vid->DeblockCall);
+ if (xN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->pos_x = (short) ((xN & 15) + *(CurPos++) * 16);
+ pix->pos_y = (short) (yN + *CurPos * 16);
+ }
+}
+
+void getNonAffNeighbourPPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0, xN >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ assert(!p_Vid->DeblockCall);
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->pos_x = (short) ((xN & 15) + *(CurPos++) * 16);
+ pix->pos_y = (short) (yN + *CurPos * 16);
+ }
+}
+
+
+void getNonAffNeighbourXPLumaNB_NoPos(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // yN >= 0
+{
+ assert(!currMB->p_Vid->DeblockCall);
+ if (xN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+
+
+ if (pix->available)
+ {
+ pix->x = (short) (xN & 15);
+ pix->y = (short) (yN);
+ }
+}
+
+void getNonAffNeighbourNPLumaNB(const Macroblock *currMB, int yN, PixelPos *pix) // xN = -1, yN >= 0 && yN < 16
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ //pix->x = (short) (-1 & 15);
+ pix->pos_x = (short) ((-1 & 15) + *(CurPos++) * 16);
+ pix->y = (short) (yN);
+ pix->pos_y = (short) (yN + *CurPos * 16);
+ }
+}
+
+
+void getNonAffNeighbourPXLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // xN is >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (xN < 16)
+ {
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else if (yN < 16)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & 15);
+ pix->pos_x = (short) (pix->x + *(CurPos++) * 16);
+ pix->y = (short) (yN & 15);
+ pix->pos_y = (short) (pix->y + *CurPos * 16);
+ }
+}
+
+void getNonAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix) // xN is >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ assert(!p_Vid->DeblockCall);
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->pos_x = (short) (xN + *(CurPos++) * 16);
+ pix->pos_y = (short) ((yN & 15) + *CurPos * 16);
+ }
+}
+
+void getNonAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix) // xN is >= 0
+{
+ assert(!currMB->p_Vid->DeblockCall);
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+
+ if (pix->available)
+ {
+ pix->y = (short) (yN & 15);
+ }
+}
+
+void getNonAffNeighbourN0Luma(const Macroblock *currMB, PixelPos *pix) // xN = -1, yN = 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ assert(p_Vid->DeblockCall == 0);
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (-1 & 15);
+ pix->pos_x = (short) (pix->x + *(CurPos++) * 16);
+ pix->y = 0;
+ pix->pos_y = (short) (*CurPos * 16);
+ }
+}
+
+
+void getNonAffNeighbourN0(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) // xN = -1, yN = 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ assert(maxH != 0);
+ assert(p_Vid->DeblockCall == 0);
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (-1 & (maxW - 1));
+ pix->pos_x = (short) (pix->x + *(CurPos++) * maxW);
+ pix->y = 0;
+ pix->pos_y = (short) (*CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbour0N(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) // xN = 0, yN = -1
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ assert(maxW != 0);
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = 0;
+ pix->pos_x = (short) (*(CurPos++) * maxW);
+ pix->y = (short) (-1 & (maxH - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbour0NLuma(const Macroblock *currMB, PixelPos *pix) // xN = 0, yN = -1
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = 0;
+ pix->pos_x = (short) (*(CurPos++) * 16);
+ pix->y = (short) (-1 & (16 - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * 16);
+ }
+}
+
+
+void getNonAffNeighbourNX(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN = -1, yN full range
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+
+ if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ }
+ else if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (-1 & (maxW - 1));
+ pix->pos_x = (short) (pix->x + *(CurPos++) * maxW);
+ pix->y = (short) (yN & (maxH - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbourNXLuma(const Macroblock *currMB, int yN, PixelPos *pix) // xN = -1, yN full range
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+
+ if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ }
+ else if (yN < 16)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (-1 & (16 - 1));
+ pix->pos_x = (short) (pix->x + *(CurPos++) * 16);
+ pix->y = (short) (yN & (16 - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * 16);
+ }
+}
+
+void getNonAffNeighbourNP(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN < 0, yN >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->pos_x = (short) ((-1 & (maxW - 1)) + *(CurPos++) * maxW);
+ pix->pos_y = (short) (yN + *CurPos * maxH);
+ }
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+}
+
+void getNonAffNeighbourNPChromaNB(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN < 0, yN >= 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (pix->available)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->pos_x = (short) ((-1 & (maxW - 1)) + *(CurPos++) * maxW);
+ pix->pos_y = (short) (yN + *CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbour0X(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN is guaranteed to be zero
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ if (0 < maxW)
+ {
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else if (yN < maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (yN < 0)
+ {
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = 0;
+ pix->pos_x = (short) (*(CurPos++) * maxW);
+ pix->y = (short) (yN & (maxH - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbour0XLuma(const Macroblock *currMB, int yN, PixelPos *pix) // xN is guaranteed to be zero
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = 16, maxH = 16;
+
+ if (yN<0)
+ {
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ }
+ else if (yN < 16)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = 0;
+ pix->pos_x = (short) (*(CurPos++) * maxW);
+ pix->y = (short) (yN & (maxH - 1));
+ pix->pos_y = (short) (pix->y + *CurPos * maxH);
+ }
+}
+
+void getNonAffNeighbourX0(const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix) // xN is full range, yN is 0
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW = mb_size[0], maxH = mb_size[1];
+
+ if (xN < 0)
+ {
+ if (0 < maxH)
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else if (xN < maxW)
+ {
+ if (0 < maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ pix->available = FALSE;
+ }
+
+ if (pix->available || p_Vid->DeblockCall && pix->mb_addr && p_Vid)
+ {
+ const int *CurPos = &p_Vid->PicPos[ pix->mb_addr ][0];
+ pix->x = (short) (xN & (maxW - 1));
+ pix->pos_x = (short) (pix->x + *(CurPos++) * maxW);
+ pix->y = 0;
+ pix->pos_y = (short) (*CurPos * maxH);
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* get neighboring positions for aff coding
+* \param currMB
+* current macroblock
+* \param xN
+* input x position
+* \param yN
+* input y position
+* \param mb_size
+* Macroblock size in pixel (according to luma or chroma MB access)
+* \param pix
+* returns position informations
+************************************************************************
+*/
+void getAffNeighbour(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+
+ maxW = mb_size[0];
+ maxH = mb_size[1];
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+ if (xN > (maxW - 1) && yN >= 0 && yN < maxH)
+ {
+ return;
+ }
+
+ if (xN < 0)
+ {
+ if (yN < 0)
+ {
+ if(!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left + 1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ if (currMB->mb_avail_upper_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2 * yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_left+1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN < 0 && yN >= 0
+ if (yN <maxH)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = yN >> 1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = yN << 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) - maxH;
+ }
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = (yN << 1) + 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) + 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ { // xN >= 0
+ if (xN >= 0 && xN < maxW)
+ {
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if ((yN <maxH))
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN >= maxW
+ if(yN < 0)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ if (currMB->mb_avail_upper_right)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = pix->pos_x + pix->x;
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbourNX(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+ int xN = -1;
+
+ maxW = mb_size[0];
+ maxH = mb_size[1];
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+ if (xN > (maxW - 1) && yN >= 0 && yN < maxH)
+ {
+ return;
+ }
+
+ if (xN < 0)
+ {
+ if (yN < 0)
+ {
+ if(!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left + 1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ if (currMB->mb_avail_upper_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2 * yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_left+1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN < 0 && yN >= 0
+ if (yN <maxH)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = yN >> 1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = yN << 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) - maxH;
+ }
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = (yN << 1) + 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) + 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ { // xN >= 0
+ if (xN >= 0 && xN < maxW)
+ {
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if ((yN <maxH))
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN >= maxW
+ if(yN < 0)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ if (currMB->mb_avail_upper_right)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = pix->pos_x + pix->x;
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbourNXLuma(const Macroblock *currMB, int yN, PixelPos *pix)
+{
+ const int mb_size[2]={16,16};
+ getAffNeighbourNX(currMB, yN, mb_size, pix);
+}
+void getAffNeighbourN0(const Macroblock *currMB, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+ int xN = -1;
+ int yN=0;
+
+ maxW = mb_size[0];
+ maxH = mb_size[1];
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+ if (xN > (maxW - 1) && yN >= 0 && yN < maxH)
+ {
+ return;
+ }
+
+ if (xN < 0)
+ {
+ if (yN < 0)
+ {
+ if(!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left + 1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ if (currMB->mb_avail_upper_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2 * yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_left+1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN < 0 && yN >= 0
+ if (yN <maxH)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = yN >> 1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = yN << 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) - maxH;
+ }
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = (yN << 1) + 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) + 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ { // xN >= 0
+ if (xN >= 0 && xN < maxW)
+ {
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if ((yN <maxH))
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN >= maxW
+ if(yN < 0)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ if (currMB->mb_avail_upper_right)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = pix->pos_x + pix->x;
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+
+void getAffNeighbourLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int maxW=16, maxH=16;
+ int yM = -1;
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+ if (xN > (maxW - 1) && yN >= 0 && yN < maxH)
+ {
+ return;
+ }
+
+ if (xN < 0)
+ {
+ if (yN < 0)
+ {
+ if(!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left + 1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_left;
+ pix->available = currMB->mb_avail_upper_left;
+ if (currMB->mb_avail_upper_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2 * yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_left+1;
+ pix->available = currMB->mb_avail_upper_left;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN < 0 && yN >= 0
+ if (yN <maxH)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = yN >> 1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = yN << 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) - maxH;
+ }
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = (yN << 1) + 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) + 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ { // xN >= 0
+ if (xN >= 0 && xN < maxW)
+ {
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if (yN <maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN >= maxW
+ if(yN < 0)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ if (currMB->mb_avail_upper_right)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = 16*pix->pos_x + pix->x;
+ pix->pos_y = 16*pix->pos_y + pix->y;
+ }
+}
+
+
+void getAffNeighbourPXLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix)
+{ // xN >= 0, yN < 16, xN < 16
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int maxW=16, maxH=16;
+ int yM = -1;
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ pix->available = currMB->mb_avail_up;
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+ else
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN);
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = 16*pix->pos_x + pix->x;
+ pix->pos_y = 16*pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbourPXLumaNB_NoPos(const Macroblock *currMB, int yN, PixelPos *pix)
+{ // xN >= 0, yN < 16, xN < 16, DeblockCall == 0
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int yM = -1;
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ pix->available = currMB->mb_avail_up;
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+
+ if (pix->available)
+ {
+ pix->y = (short) (yM & 15);
+ }
+}
+
+
+void getAffNeighbourXPLuma(const Macroblock *currMB, int xN, int yN, PixelPos *pix)
+{ // yN >= 0
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int maxW=16, maxH=16;
+ int yM = -1;
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+ if (xN > (maxW - 1) && yN < maxH)
+ {
+ return;
+ }
+
+ if (xN < 0)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = yN >> 1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = yN << 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) - maxH;
+ }
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = (yN << 1) + 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) + 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+ else if (xN < maxW)
+ { // xN >= 0
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if (yN <maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = 16*pix->pos_x + pix->x;
+ pix->pos_y = 16*pix->pos_y + pix->y;
+ }
+}
+
+
+void getAffNeighbourPPLumaNB(const Macroblock *currMB, int xN, int yN, PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ // xN >= 0
+ // yN >=0
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+
+ pix->x = (short) (xN & (16 - 1));
+ pix->y = (short) (yN & (16 - 1));
+ get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = 16*pix->pos_x + pix->x;
+ pix->pos_y = 16*pix->pos_y + pix->y;
+}
+
+void getAffNeighbourNPLuma(const Macroblock *currMB, int yN, PixelPos *pix)
+{ // yN >= 0
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int maxW=16, maxH=16;
+ int yM = -1;
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+
+ if (yN <maxH)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = yN >> 1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ else
+ {
+ (pix->mb_addr)+= ((yN & 0x01) != 0);
+ yM = (yN + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = yN << 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) - maxH;
+ }
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (yN < (maxH >> 1))
+ {
+ yM = (yN << 1) + 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (yN << 1 ) + 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = yN;
+ }
+ }
+ }
+ }
+ }
+
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (-1 & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = 16*pix->pos_x + pix->x;
+ pix->pos_y = 16*pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbourN0Luma(const Macroblock *currMB, PixelPos *pix)
+{ // xN = -1 && yN == 0
+ VideoParameters *p_Vid = currMB->p_Vid;
+ //const int maxW=16, maxH=16;
+ int yM = -1;
+
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ yM = 0;
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 0;
+ }
+ else
+ {
+ yM = 8;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ yM = 0;
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ yM = 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = 0;
+ }
+ }
+ }
+ }
+
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (-1 & 15);
+ pix->y = (short) (yM & 15);
+ get_mb_block_pos_mbaff(p_Vid->PicPos, pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = 16*pix->pos_x + pix->x;
+ pix->pos_y = 16*pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbourX0(const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+
+ maxW = mb_size[0];
+ maxH = mb_size[1];
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(0 > (maxH - 1))
+ {
+ return;
+ }
+ if (xN > (maxW - 1) && 0 < maxH)
+ {
+ return;
+ }
+
+ if (xN < 0)
+ {
+ if (0 <maxH)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ yM = 0;
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 0;
+ }
+ else
+ {
+ yM = (0 + maxH) >> 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (0 < (maxH >> 1))
+ {
+ yM = 0;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = (0) - maxH;
+ }
+ }
+ else
+ {
+ yM = 0;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_left;
+ pix->available = currMB->mb_avail_left;
+ if (currMB->mb_avail_left)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_left].mb_field)
+ {
+ if (0 < (maxH >> 1))
+ {
+ yM = 1;
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = 1 - maxH;
+ }
+ }
+ else
+ {
+ (pix->mb_addr)++;
+ yM = 0;
+ }
+ }
+ }
+ }
+ }
+
+ }
+ else
+ { // xN >= 0
+ if (xN >= 0 && xN < maxW)
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = 0 - 1;
+ }
+
+ else if (0 <maxH)
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = 0;
+ }
+
+ }
+ }
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = (short) (xN & (maxW - 1));
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_x = pix->pos_x + pix->x;
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbour0X(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix) // xN == 0, yN full range
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+
+ maxW = mb_size[0];
+ maxH = mb_size[1];
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > (maxH - 1))
+ {
+ return;
+ }
+ if (0 > (maxW - 1) && yN >= 0 && yN < maxH)
+ {
+ return;
+ }
+
+ if (0 < maxW)
+ {
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if ((yN >= 0) && (yN <maxH))
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ }
+ else
+ { // xN >= maxW
+ if(yN < 0)
+ {
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ if (currMB->mb_avail_upper_right)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = yN;
+ }
+ }
+ }
+ }
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = 0;
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbour0XLuma(const Macroblock *currMB, int yN, PixelPos *pix) // xN == 0, yN full range
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+
+ maxW = 16;
+ maxH = 16;
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if(yN > 15)
+ {
+ return;
+ }
+
+ if (yN<0)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = 2* yN;
+ }
+ else
+ {
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = yN;
+ }
+ }
+ }
+ else
+ {
+ // yN >=0
+ // for the deblocker if this is the extra edge then do this special stuff
+ if (yN == 0 && p_Vid->DeblockCall == 2)
+ {
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = TRUE;
+ yM = yN - 1;
+ }
+
+ else if ((yN >= 0) && (yN <maxH))
+ {
+ pix->mb_addr = currMB->mbAddrX;
+ pix->available = TRUE;
+ yM = yN;
+ }
+ }
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ const int mb_size[2] = {16,16};
+ pix->x = 0;
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+
+
+void getAffNeighbour0N(const Macroblock *currMB, const int mb_size[2], PixelPos *pix) // xN == 0, yN = -1
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int maxW, maxH;
+ int yM = -1;
+
+ maxW = mb_size[0];
+ maxH = mb_size[1];
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if (0 < maxW)
+ {
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = -1;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = -1;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = -2;
+ }
+ else
+ {
+ yM = -1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = -1;
+ }
+ }
+ }
+ else
+ { // xN >= maxW
+ if (!currMB->mb_field)
+ {
+ // frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = -1;
+ }
+ else
+ {
+ // bottom
+ pix->available = FALSE;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_upper_right;
+ pix->available = currMB->mb_avail_upper_right;
+ if (currMB->mb_avail_upper_right)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_upper_right].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = -2;
+ }
+ else
+ {
+ yM = -1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_upper_right + 1;
+ pix->available = currMB->mb_avail_upper_right;
+ yM = -1;
+ }
+ }
+ }
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ pix->x = 0;
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+void getAffNeighbour0NLuma(const Macroblock *currMB, PixelPos *pix) // xN == 0, yN = -1
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ const int maxW=16, maxH=16;
+ int yM = -1;
+
+
+ // initialize to "not available"
+ pix->available = FALSE;
+
+ if (!currMB->mb_field)
+ {
+ //frame
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ //top
+ pix->mb_addr = currMB->mb_addr_up;
+ // for the deblocker if the current MB is a frame and the one above is a field
+ // then the neighbor is the top MB of the pair
+ if (currMB->mb_avail_up)
+ {
+ if (!(p_Vid->DeblockCall == 1 && (p_Vid->mb_data[currMB->mb_addr_up]).mb_field))
+ pix->mb_addr += 1;
+ }
+
+ pix->available = currMB->mb_avail_up;
+ yM = -1;
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mbAddrX - 1;
+ pix->available = TRUE;
+ yM = -1;
+ }
+ }
+ else
+ {
+ // field
+ if ((currMB->mbAddrX & 0x01) == 0)
+ {
+ // top
+ pix->mb_addr = currMB->mb_addr_up;
+ pix->available = currMB->mb_avail_up;
+ if (currMB->mb_avail_up)
+ {
+ if(!p_Vid->mb_data[currMB->mb_addr_up].mb_field)
+ {
+ (pix->mb_addr)++;
+ yM = -2;
+ }
+ else
+ {
+ yM = -1;
+ }
+ }
+ }
+ else
+ {
+ // bottom
+ pix->mb_addr = currMB->mb_addr_up + 1;
+ pix->available = currMB->mb_avail_up;
+ yM = -1;
+ }
+ }
+
+ if (pix->available || p_Vid->DeblockCall)
+ {
+ const int mb_size[2] = {16,16};
+ pix->x = 0;
+ pix->y = (short) (yM & (maxH - 1));
+ get_mb_pos(p_Vid, pix->mb_addr, mb_size, &(pix->pos_x), &(pix->pos_y));
+ pix->pos_y = pix->pos_y + pix->y;
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* get neighboring 4x4 block
+* \param currMB
+* current macroblock
+* \param block_x
+* input x block position
+* \param block_y
+* input y block position
+* \param mb_size
+* Macroblock size in pixel (according to luma or chroma MB access)
+* \param pix
+* returns position informations
+************************************************************************
+*/
+void get4x4Neighbour(const Macroblock *currMB, int block_x, int block_y, const int mb_size[2], PixelPos *pix)
+{
+ currMB->p_Vid->getNeighbour(currMB, block_x, block_y, mb_size, pix);
+
+ if (pix->available)
+ {
+ pix->x >>= 2;
+ pix->y >>= 2;
+ pix->pos_x >>= 2;
+ pix->pos_y >>= 2;
+ }
+}
+
+void get4x4NeighbourLuma(const Macroblock *currMB, int block_x, int block_y, PixelPos *pix)
+{
+ currMB->p_Vid->getNeighbourLuma(currMB, block_x, block_y, pix);
+
+ if (pix->available)
+ {
+ pix->x >>= 2;
+ pix->y >>= 2;
+ pix->pos_x >>= 2;
+ pix->pos_y >>= 2;
+ }
+}
diff --git a/Src/h264dec/ldecod/src/mb_prediction.c b/Src/h264dec/ldecod/src/mb_prediction.c
new file mode 100644
index 00000000..799236a8
--- /dev/null
+++ b/Src/h264dec/ldecod/src/mb_prediction.c
@@ -0,0 +1,979 @@
+/*!
+*************************************************************************************
+* \file mb_prediction.c
+*
+* \brief
+* Macroblock prediction functions
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Alexis Michael Tourapis <alexismt@ieee.org>
+*************************************************************************************
+*/
+
+#include "contributors.h"
+
+#include "block.h"
+#include "global.h"
+#include "mbuffer.h"
+#include "elements.h"
+#include "errorconcealment.h"
+#include "macroblock.h"
+#include "fmo.h"
+#include "cabac.h"
+#include "vlc.h"
+#include "image.h"
+#include "mb_access.h"
+#include "biaridecod.h"
+#include "transform8x8.h"
+#include "transform.h"
+#include "mc_prediction.h"
+#include "quant.h"
+#include "intra4x4_pred.h"
+#include "intra8x8_pred.h"
+#include "intra16x16_pred.h"
+#include "mv_prediction.h"
+#include "mb_prediction.h"
+#include "optim.h"
+
+
+int mb_pred_intra4x4(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ Slice *currSlice = currMB->p_Slice;
+ int yuv = dec_picture->chroma_format_idc - 1;
+
+ if (currMB->is_lossless == FALSE)
+ {
+ const h264_short_block_t *blocks = currSlice->cof4[curr_plane];
+ const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[curr_plane];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[curr_plane];
+ int block_x = currMB->block_x;
+ int block_y = currMB->block_y;
+ if (intrapred(currMB, curr_plane, 0,0,block_x + 0,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 0, 0);
+ if (intrapred(currMB, curr_plane, 4,0,block_x + 1,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 4, 0);
+ if (intrapred(currMB, curr_plane, 0,4,block_x + 0,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 0, 4);
+ if (intrapred(currMB, curr_plane, 4,4,block_x + 1,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 4, 4);
+ if (intrapred(currMB, curr_plane, 8,0,block_x + 2,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 8, 0);
+ if (intrapred(currMB, curr_plane, 12,0,block_x + 3,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 12, 0);
+ if (intrapred(currMB, curr_plane, 8,4,block_x + 2,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 8, 4);
+ if (intrapred(currMB, curr_plane, 12,4,block_x + 3,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 12, 4);
+ if (intrapred(currMB, curr_plane, 0,8,block_x + 0,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 0, 8);
+ if (intrapred(currMB, curr_plane, 4,8,block_x + 1,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 4, 8);
+ if (intrapred(currMB, curr_plane, 0,12,block_x + 0,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 0, 12);
+ if (intrapred(currMB, curr_plane, 4,12,block_x + 1,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 4, 12);
+ if (intrapred(currMB, curr_plane, 8,8,block_x + 2,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 8, 8);
+ if (intrapred(currMB, curr_plane, 12,8,block_x + 3,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 12, 8);
+ if (intrapred(currMB, curr_plane, 8,12,block_x + 2,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 8, 12);
+ if (intrapred(currMB, curr_plane, 12,12,block_x + 3,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 12, 12);
+ // benski> prediction might reference other parts of the image reconstructed during this block, so can't just do a single 16x16 image copy
+ }
+ else
+ { // lossless
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[curr_plane];
+ int block_x = currMB->block_x;
+ int block_y = currMB->block_y;
+
+ if (intrapred(currMB, curr_plane, 0,0,block_x + 0,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 0, 0);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 0, 0);
+ if (intrapred(currMB, curr_plane, 4,0,block_x + 1,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 4, 0);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 4, 0);
+ if (intrapred(currMB, curr_plane, 0,4,block_x + 0,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 0, 4);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 0, 4);
+ if (intrapred(currMB, curr_plane, 4,4,block_x + 1,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 4, 4);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 4, 4);
+ if (intrapred(currMB, curr_plane, 8,0,block_x + 2,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 8, 0);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 8, 0);
+ if (intrapred(currMB, curr_plane, 12,0,block_x + 3,block_y + 0) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 12, 0);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 0)<<2, currSlice->mb_rec[curr_plane], 12, 0);
+ if (intrapred(currMB, curr_plane, 8,4,block_x + 2,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 8, 4);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 8, 4);
+ if (intrapred(currMB, curr_plane, 12,4,block_x + 3,block_y + 1) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 12, 4);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 1)<<2, currSlice->mb_rec[curr_plane], 12, 4);
+ if (intrapred(currMB, curr_plane, 0,8,block_x + 0,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 0, 8);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 0, 8);
+ if (intrapred(currMB, curr_plane, 4,8,block_x + 1,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 4, 8);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 4, 8);
+ if (intrapred(currMB, curr_plane, 0,12,block_x + 0,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 0, 12);
+ copy_image_data_4x4_stride(image, (block_x + 0)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 0, 12);
+ if (intrapred(currMB, curr_plane, 4,12,block_x + 1,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 4, 12);
+ copy_image_data_4x4_stride(image, (block_x + 1)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 4, 12);
+ if (intrapred(currMB, curr_plane, 8,8,block_x + 2,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 8, 8);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 8, 8);
+ if (intrapred(currMB, curr_plane, 12,8,block_x + 3,block_y + 2) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 12, 8);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 2)<<2, currSlice->mb_rec[curr_plane], 12, 8);
+ if (intrapred(currMB, curr_plane, 8,12,block_x + 2,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 8, 12);
+ copy_image_data_4x4_stride(image, (block_x + 2)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 8, 12);
+ if (intrapred(currMB, curr_plane, 12,12,block_x + 3,block_y + 3) == SEARCH_SYNC) return SEARCH_SYNC;
+ Inv_Residual_trans_4x4(currMB, curr_plane, 12, 12);
+ copy_image_data_4x4_stride(image, (block_x + 3)<<2, (block_y + 3)<<2, currSlice->mb_rec[curr_plane], 12, 12);
+ // benski> prediction might reference other parts of the image reconstructed during this block, so can't just do a single 16x16 image copy
+ }
+
+ // chroma decoding *******************************************************
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ intra_cr_decoding(currMB, yuv);
+ }
+
+ return 1;
+}
+
+
+int mb_pred_intra16x16(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ int yuv = dec_picture->chroma_format_idc - 1;
+
+ intrapred16x16(currMB, curr_plane, currMB->i16mode);
+ currMB->ipmode_DPCM = (char) currMB->i16mode; //For residual DPCM
+ // =============== 4x4 itrans ================
+ // -------------------------------------------
+ iMBtrans4x4(currMB, curr_plane, 0);
+
+ // chroma decoding *******************************************************
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ intra_cr_decoding(currMB, yuv);
+ }
+ return 1;
+}
+
+int mb_pred_intra8x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ Slice *currSlice = currMB->p_Slice;
+ int yuv = dec_picture->chroma_format_idc - 1;
+
+ if (currMB->is_lossless)
+ {
+ //PREDICTION
+ intrapred8x8(currMB, curr_plane, 0, 0);
+ Inv_Residual_trans_8x8(currMB, curr_plane, 0,0); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 0, 0);
+
+ intrapred8x8(currMB, curr_plane, 8, 0);
+ Inv_Residual_trans_8x8(currMB, curr_plane, 8,0); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 8, 0);
+
+ intrapred8x8(currMB, curr_plane, 0, 8);
+ Inv_Residual_trans_8x8(currMB, curr_plane, 0,8); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 0, 8);
+
+ intrapred8x8(currMB, curr_plane, 8, 8);
+ Inv_Residual_trans_8x8 (currMB, curr_plane, 8,8); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 8, 8);
+ }
+ else
+ {
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[curr_plane];
+ h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[curr_plane];
+ h264_short_8x8block_t *mb_rres8 = currSlice->mb_rres8[curr_plane];
+
+ //PREDICTION
+ intrapred8x8(currMB, curr_plane, 0, 0);
+ opt_itrans8x8(mb_rec, mb_pred, mb_rres8[0], 0); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 0, 0);
+
+ intrapred8x8(currMB, curr_plane, 8, 0);
+ opt_itrans8x8(mb_rec, mb_pred, mb_rres8[1], 8); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 0, currSlice->mb_rec[curr_plane], 8, 0);
+
+ intrapred8x8(currMB, curr_plane, 0, 8);
+ opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[2], 0); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 0 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 0, 8);
+
+ intrapred8x8(currMB, curr_plane, 8, 8);
+ opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[3], 8); // use DCT transform and make 8x8 block m7 from prediction block mpr
+ copy_image_data_8x8_stride2(image, currMB->pix_x + 8 ,currMB->pix_y + 8, currSlice->mb_rec[curr_plane], 8, 8);
+ }
+
+ // chroma decoding *******************************************************
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ intra_cr_decoding(currMB, yuv);
+ }
+ return 1;
+}
+
+
+static void set_chroma_vector(Macroblock *currMB, int *list_offset)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (!currSlice->mb_aff_frame_flag)
+ {
+ if(p_Vid->structure == TOP_FIELD)
+ {
+ int k,l;
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ if(p_Vid->structure != p_Vid->listX[l][k]->structure)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = -2;
+ else
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ else if(p_Vid->structure == BOTTOM_FIELD)
+ {
+ int k,l;
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ if (p_Vid->structure != p_Vid->listX[l][k]->structure)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = 2;
+ else
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ else
+ {
+ int k,l;
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ }
+ else
+ {
+ int mb_nr = (currMB->mbAddrX & 0x01);
+ int k,l;
+
+ //////////////////////////
+ // find out the correct list offsets
+ if (currMB->mb_field)
+ {
+ *list_offset = mb_nr ? 4 : 2;
+
+ for (l = LIST_0 + *list_offset; l <= (LIST_1 + *list_offset); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ if(mb_nr == 0 && p_Vid->listX[l][k]->structure == BOTTOM_FIELD)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = -2;
+ else if(mb_nr == 1 && p_Vid->listX[l][k]->structure == TOP_FIELD)
+ p_Vid->listX[l][k]->chroma_vector_adjustment = 2;
+ else
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ else
+ {
+ for (l = LIST_0; l <= (LIST_1); l++)
+ {
+ for(k = 0; k < p_Vid->listXsize[l]; k++)
+ {
+ p_Vid->listX[l][k]->chroma_vector_adjustment= 0;
+ }
+ }
+ }
+ }
+
+ p_Vid->max_mb_vmv_r = (p_Vid->structure != FRAME || (currSlice->mb_aff_frame_flag && currMB->mb_field)) ? p_Vid->max_vmv_r >> 1 : p_Vid->max_vmv_r;
+}
+
+void mb_pred_skip(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ perform_mc16x16(currMB, curr_plane, dec_picture, LIST_0, list_offset, curr_mb_field);
+
+ opt_copy_image_data_16x16_stride(image, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[curr_plane]);
+
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ copy_image_data_8x8_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1]);
+ copy_image_data_8x8_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2]);
+ }
+ else if (dec_picture->chroma_format_idc == YUV422)
+ {
+ copy_image_data_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1], 8, 16);
+ copy_image_data_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2], 8, 16);
+ }
+}
+
+void mb_pred_sp_skip(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ Slice *currSlice = currMB->p_Slice;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ perform_mc16x16(currMB, curr_plane, dec_picture, LIST_0, list_offset, curr_mb_field);
+ iTransform(currMB, curr_plane, 1);
+}
+
+void mb_pred_p_inter8x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ int block8x8; // needed for ABT
+ int i=0, j=0,k;
+
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int smb = p_Vid->type == SP_SLICE && IS_INTER(currMB);
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ for (block8x8=0; block8x8<4; block8x8++)
+ {
+ int mv_mode = currMB->b8mode[block8x8];
+ int pred_dir = currMB->b8pdir[block8x8];
+ if (mv_mode == SMB8x8)
+ {
+ i = (decode_block_scan[block8x8*4] & 3);
+ j = block8x8 & ~1;
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field);
+ }
+ else if (mv_mode == SMB4x4)
+ {
+ int k_start = (block8x8 << 2);
+ int k_inc = (mv_mode == SMB8x4) ? 2 : 1;
+ int k_end = (mv_mode == SMB8x8) ? k_start + 1 : ((mv_mode == SMB4x4) ? k_start + 4 : k_start + k_inc + 1);
+
+ int block_size_x = (mv_mode == SMB8x4) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+ int block_size_y = (mv_mode == SMB4x8) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+
+ for (k = k_start; k < k_end; k += k_inc)
+ {
+ i = (decode_block_scan[k] & 3);
+ j = ((decode_block_scan[k] >> 2) & 3);
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field);
+ }
+ }
+ else
+ {
+ int k_start = (block8x8 << 2);
+ int k_inc = (mv_mode == SMB8x4) ? 2 : 1;
+ int k_end = k_start + k_inc + 1;
+
+ int block_size_x = (mv_mode == SMB8x4) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+ int block_size_y = (mv_mode == SMB4x8) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+
+ for (k = k_start; k < k_end; k += k_inc)
+ {
+ i = (decode_block_scan[k] & 3);
+ j = ((decode_block_scan[k] >> 2) & 3);
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field);
+ }
+ }
+ /* generic:
+ int k_start = (block8x8 << 2);
+ int k_inc = (mv_mode == SMB8x4) ? 2 : 1;
+ int k_end = (mv_mode == SMB8x8) ? k_start + 1 : ((mv_mode == SMB4x4) ? k_start + 4 : k_start + k_inc + 1);
+
+ int block_size_x = ( mv_mode == SMB8x4 || mv_mode == SMB8x8 ) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+ int block_size_y = ( mv_mode == SMB4x8 || mv_mode == SMB8x8 ) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+
+ for (k = k_start; k < k_end; k += k_inc)
+ {
+ i = (decode_block_scan[k] & 3);
+ j = ((decode_block_scan[k] >> 2) & 3);
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field);
+ }
+ */
+ }
+
+ iTransform(currMB, curr_plane, smb);
+}
+
+void mb_pred_p_inter16x16(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ int smb = (currMB->p_Vid->type == SP_SLICE);
+ Slice *currSlice = currMB->p_Slice;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ perform_mc16x16(currMB, curr_plane, dec_picture, currMB->b8pdir[0], list_offset, curr_mb_field);
+ iTransform(currMB, curr_plane, smb);
+}
+
+void mb_pred_p_inter16x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ int smb = (currMB->p_Vid->type == SP_SLICE);
+ Slice *currSlice = currMB->p_Slice;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ perform_mc16x8(currMB, curr_plane, dec_picture, currMB->b8pdir[0], 0, 0, list_offset, curr_mb_field);
+ perform_mc16x8(currMB, curr_plane, dec_picture, currMB->b8pdir[2], 0, 2, list_offset, curr_mb_field);
+ iTransform(currMB, curr_plane, smb);
+}
+
+void mb_pred_p_inter8x16(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ int smb = (currMB->p_Vid->type == SP_SLICE);
+ Slice *currSlice = currMB->p_Slice;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ perform_mc8x16(currMB, curr_plane, dec_picture, currMB->b8pdir[0], 0, 0, list_offset, curr_mb_field);
+ perform_mc8x16(currMB, curr_plane, dec_picture, currMB->b8pdir[1], 2, 0, list_offset, curr_mb_field);
+ iTransform(currMB, curr_plane, smb);
+}
+
+void mb_pred_b_dtemporal(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ short ref_idx;
+ int refList;
+
+ PicMotionParams *motion = &dec_picture->motion;
+ int k;
+ int block8x8; // needed for ABT
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+
+ MotionParams *colocated = &currSlice->p_colocated->frame;
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ if (currMB->mb_field)
+ {
+ if(currMB->mbAddrX & 0x01)
+ {
+ colocated = &currSlice->p_colocated->bottom;
+ }
+ else
+ {
+ colocated = &currSlice->p_colocated->top;
+ }
+ }
+
+ for (block8x8=0; block8x8<4; block8x8++)
+ {
+ int pred_dir = currMB->b8pdir[block8x8];
+
+ int k_start = (block8x8 << 2);
+ int k_end = k_start;
+
+ if (p_Vid->active_sps->direct_8x8_inference_flag)
+ {
+ k_end ++;
+ }
+ else
+ {
+ k_end += BLOCK_MULTIPLE;
+ }
+
+ for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++)
+ {
+
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int j6 = currMB->block_y_aff + j;
+ assert (pred_dir<=2);
+
+ refList = (colocated->motion[LIST_0][j6][i4].ref_idx== -1 ? LIST_1 : LIST_0);
+ ref_idx = colocated->motion[refList][j6][i4].ref_idx;
+
+ if(ref_idx==-1) // co-located is intra mode
+ {
+ memset( &motion->motion[LIST_0][j4][i4].mv, 0, sizeof(MotionVector));
+ memset( &motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector));
+
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ else // co-located skip or inter mode
+ {
+ int mapped_idx=0;
+ int iref;
+
+ for (iref=0;iref<imin(currSlice->num_ref_idx_l0_active,p_Vid->listXsize[LIST_0 + list_offset]);iref++)
+ {
+ if(p_Vid->structure==0 && curr_mb_field==0)
+ {
+ // If the current MB is a frame MB and the colocated is from a field picture,
+ // then the colocated->ref_pic_id may have been generated from the wrong value of
+ // frame_poc if it references it's complementary field, so test both POC values
+ if(p_Vid->listX[0][iref]->top_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id || p_Vid->listX[0][iref]->bottom_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id)
+ {
+ mapped_idx=iref;
+ break;
+ }
+ else //! invalid index. Default to zero even though this case should not happen
+ mapped_idx=INVALIDINDEX;
+ continue;
+ }
+
+ if (dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][iref]==colocated->motion[refList][j6][i4].ref_pic_id)
+ {
+ mapped_idx=iref;
+ break;
+ }
+ else //! invalid index. Default to zero even though this case should not happen
+ {
+ mapped_idx=INVALIDINDEX;
+ }
+ }
+ if (INVALIDINDEX == mapped_idx)
+ {
+ error("temporal direct error: colocated block has ref that is unavailable",-1111);
+ }
+ else
+ {
+ int mv_scale = currSlice->mvscale[LIST_0 + list_offset][mapped_idx];
+
+ //! In such case, an array is needed for each different reference.
+ if (mv_scale == 9999 || p_Vid->listX[LIST_0+list_offset][mapped_idx]->is_long_term)
+ {
+ memcpy(&motion->motion[LIST_0][j4][i4].mv, &colocated->motion[refList][j6][i4].mv, sizeof(MotionVector));
+ memset(&motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector));
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[0] + 128 ) >> 8);
+ motion->motion[LIST_0][j4][i4].mv[1]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[1] + 128 ) >> 8);
+
+ motion->motion[LIST_1][j4][i4].mv[0]= (short) (motion->motion[LIST_0][j4][i4].mv[0] - colocated->motion[refList][j6][i4].mv[0]);
+ motion->motion[LIST_1][j4][i4].mv[1]= (short) (motion->motion[LIST_0][j4][i4].mv[1] - colocated->motion[refList][j6][i4].mv[1]);
+ }
+
+ motion->motion[LIST_0][j4][i4].ref_idx = (char) mapped_idx; //p_Vid->listX[1][0]->ref_idx[refList][j4][i4];
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ }
+ // store reference picture ID determined by direct mode
+ motion->motion[LIST_0][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][(short)motion->motion[LIST_0][j4][i4].ref_idx];
+ motion->motion[LIST_1][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset][(short)motion->motion[LIST_1][j4][i4].ref_idx];
+ }
+ for (k = k_start; k < k_end; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ if (p_Vid->active_sps->direct_8x8_inference_flag)
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field);
+ else
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, 4, 4, curr_mb_field);
+ }
+ }
+
+ if (currMB->cbp == 0)
+ {
+ opt_copy_image_data_16x16_stride(image, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[curr_plane]);
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444))
+ {
+ copy_image_data_stride(dec_picture->imgUV[0], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[1], p_Vid->mb_size[IS_CHROMA][0], p_Vid->mb_size[IS_CHROMA][1]);
+ copy_image_data_stride(dec_picture->imgUV[1], currMB->pix_c_x, currMB->pix_c_y, currSlice->mb_pred[2], p_Vid->mb_size[IS_CHROMA][0], p_Vid->mb_size[IS_CHROMA][1]);
+ }
+ }
+ else
+ iTransform(currMB, curr_plane, 0);
+}
+
+
+void mb_pred_b_inter8x8(Macroblock *currMB, ColorPlane curr_plane, VideoImage *image, StorablePicture *dec_picture)
+{
+ short ref_idx;
+ int refList;
+
+ char l0_rFrame = -1, l1_rFrame = -1;
+ PicMotionParams *motion = &dec_picture->motion;
+ short pmvl0[2]={0,0}, pmvl1[2]={0,0};
+ int block_size_x, block_size_y;
+ int k;
+ int block8x8; // needed for ABT
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ int curr_mb_field = ((currSlice->mb_aff_frame_flag)&&(currMB->mb_field));
+
+ MotionParams *colocated = &currSlice->p_colocated->frame;
+ int list_offset = 0;
+
+ set_chroma_vector(currMB, &list_offset);
+
+ if (currMB->mb_field)
+ {
+ if(currMB->mbAddrX & 0x01)
+ {
+ colocated = &currSlice->p_colocated->bottom;
+ }
+ else
+ {
+ colocated = &currSlice->p_colocated->top;
+ }
+ }
+
+ // prepare direct modes
+ if (currSlice->direct_spatial_mv_pred_flag && (!(currMB->b8mode[0] && currMB->b8mode[1] && currMB->b8mode[2] && currMB->b8mode[3])))
+ prepare_direct_params(currMB, dec_picture, pmvl0, pmvl1, &l0_rFrame, &l1_rFrame);
+
+ for (block8x8=0; block8x8<4; block8x8++)
+ {
+ int mv_mode = currMB->b8mode[block8x8];
+ int pred_dir = currMB->b8pdir[block8x8];
+
+ if ( mv_mode == SMB8x8)
+ {
+ int i = (decode_block_scan[block8x8*4] & 3);
+ int j = ((decode_block_scan[block8x8*4] >> 2) & 3);
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field);
+ }
+ else if ( mv_mode == SMB4x4)
+ {
+ int k_start = (block8x8 << 2);
+
+ for (k = k_start; k < k_start + 4; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, BLOCK_SIZE, BLOCK_SIZE, curr_mb_field);
+ }
+ }
+ else if ( mv_mode != BSKIP_DIRECT)
+ {
+ int k_start = (block8x8 << 2);
+ int k_inc = (mv_mode == SMB8x4) ? 2 : 1;
+ int k_end = (k_start + k_inc + 1);
+
+ block_size_x = ( mv_mode == SMB8x4) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+ block_size_y = ( mv_mode == SMB4x8) ? SMB_BLOCK_SIZE : BLOCK_SIZE;
+
+ for (k = k_start; k < k_end; k += k_inc)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, block_size_x, block_size_y, curr_mb_field);
+ }
+ }
+ else
+ {
+ int k_start = (block8x8 << 2);
+
+ // Prepare mvs (needed for deblocking and mv prediction
+ if (currSlice->direct_spatial_mv_pred_flag)
+ {
+ h264_ref_t *ref_pic_num_l0 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset];
+ h264_ref_t *ref_pic_num_l1 = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset];
+
+ for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int j6 = currMB->block_y_aff + j;
+
+ assert (pred_dir<=2);
+ //===== DIRECT PREDICTION =====
+
+ if (l0_rFrame >=0)
+ {
+ if (!l0_rFrame && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0] = pmvl0[0];
+ motion->motion[LIST_0][j4][i4].mv[1] = pmvl0[1];
+ motion->motion[LIST_0][j4][i4].ref_idx = l0_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].ref_idx = -1;
+ motion->motion[LIST_0][j4][i4].mv[0] = 0;
+ motion->motion[LIST_0][j4][i4].mv[1] = 0;
+ }
+
+ if (l1_rFrame >=0)
+ {
+ if (l1_rFrame==0 && ((!colocated->moving_block[j6][i4]) && (!p_Vid->listX[LIST_1 + list_offset][0]->is_long_term)))
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame;
+ }
+ else
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = pmvl1[0];
+ motion->motion[LIST_1][j4][i4].mv[1] = pmvl1[1];
+ motion->motion[LIST_1][j4][i4].ref_idx = l1_rFrame;
+ }
+ }
+ else
+ {
+ motion->motion[LIST_1][j4][i4].mv[0] = 0;
+ motion->motion[LIST_1][j4][i4].mv[1] = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = -1;
+ }
+
+ if (l0_rFrame < 0 && l1_rFrame < 0)
+ {
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+
+ if (motion->motion[LIST_1][j4][i4].ref_idx==-1)
+ {
+ pred_dir = 0;
+ ref_idx = (motion->motion[LIST_0][j4][i4].ref_idx != -1) ? motion->motion[LIST_0][j4][i4].ref_idx : 0;
+ }
+ else if (motion->motion[LIST_0][j4][i4].ref_idx==-1)
+ {
+ pred_dir = 1;
+ ref_idx = (motion->motion[LIST_1][j4][i4].ref_idx != -1) ? motion->motion[LIST_1][j4][i4].ref_idx : 0;
+ }
+ else
+ pred_dir = 2;
+
+ motion->motion[LIST_0][j4][i4].ref_pic_id = ref_pic_num_l0[(short)motion->motion[LIST_0][j4][i4].ref_idx];
+ motion->motion[LIST_1][j4][i4].ref_pic_id = ref_pic_num_l1[(short)motion->motion[LIST_1][j4][i4].ref_idx];
+ }
+ }
+ else
+ {
+ for (k = k_start; k < k_start + BLOCK_MULTIPLE; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int j6 = currMB->block_y_aff + j;
+
+ assert (pred_dir<=2);
+
+ refList = (colocated->motion[LIST_0][j6][i4].ref_idx== -1 ? LIST_1 : LIST_0);
+ ref_idx = colocated->motion[refList][j6][i4].ref_idx;
+
+ if(ref_idx==-1) // co-located is intra mode
+ {
+ memset( &motion->motion[LIST_0][j4][i4].mv, 0, sizeof(MotionVector));
+ memset( &motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector));
+
+ motion->motion[LIST_0][j4][i4].ref_idx = 0;
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ else // co-located skip or inter mode
+ {
+ int mapped_idx=0;
+ int iref;
+
+ for (iref=0;iref<imin(currSlice->num_ref_idx_l0_active,p_Vid->listXsize[LIST_0 + list_offset]);iref++)
+ {
+ if(p_Vid->structure==0 && curr_mb_field==0)
+ {
+ // If the current MB is a frame MB and the colocated is from a field picture,
+ // then the colocated->ref_pic_id may have been generated from the wrong value of
+ // frame_poc if it references it's complementary field, so test both POC values
+ if(p_Vid->listX[0][iref]->top_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id || p_Vid->listX[0][iref]->bottom_poc*2 == colocated->motion[refList][j6][i4].ref_pic_id)
+ {
+ mapped_idx=iref;
+ break;
+ }
+ else //! invalid index. Default to zero even though this case should not happen
+ mapped_idx=INVALIDINDEX;
+ continue;
+ }
+
+ if (dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][iref]==colocated->motion[refList][j6][i4].ref_pic_id)
+ {
+ mapped_idx=iref;
+ break;
+ }
+ else //! invalid index. Default to zero even though this case should not happen
+ {
+ mapped_idx=INVALIDINDEX;
+ }
+ }
+ if (INVALIDINDEX == mapped_idx)
+ {
+ error("temporal direct error: colocated block has ref that is unavailable",-1111);
+ }
+ else
+ {
+ int mv_scale = currSlice->mvscale[LIST_0 + list_offset][mapped_idx];
+
+ //! In such case, an array is needed for each different reference.
+ if (mv_scale == 9999 || p_Vid->listX[LIST_0+list_offset][mapped_idx]->is_long_term)
+ {
+ memcpy(&motion->motion[LIST_0][j4][i4].mv, &colocated->motion[refList][j6][i4].mv, sizeof(MotionVector));
+ memset(&motion->motion[LIST_1][j4][i4].mv, 0, sizeof(MotionVector));
+ }
+ else
+ {
+ motion->motion[LIST_0][j4][i4].mv[0]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[0] + 128 ) >> 8);
+ motion->motion[LIST_0][j4][i4].mv[1]= (short) ((mv_scale * colocated->motion[refList][j6][i4].mv[1] + 128 ) >> 8);
+
+ motion->motion[LIST_1][j4][i4].mv[0]= (short) (motion->motion[LIST_0][j4][i4].mv[0] - colocated->motion[refList][j6][i4].mv[0]);
+ motion->motion[LIST_1][j4][i4].mv[1]= (short) (motion->motion[LIST_0][j4][i4].mv[1] - colocated->motion[refList][j6][i4].mv[1]);
+ }
+
+ motion->motion[LIST_0][j4][i4].ref_idx = (char) mapped_idx; //p_Vid->listX[1][0]->ref_idx[refList][j4][i4];
+ motion->motion[LIST_1][j4][i4].ref_idx = 0;
+ }
+ }
+ // store reference picture ID determined by direct mode
+ motion->motion[LIST_0][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_0 + list_offset][(short)motion->motion[LIST_0][j4][i4].ref_idx];
+ motion->motion[LIST_1][j4][i4].ref_pic_id = dec_picture->ref_pic_num[p_Vid->current_slice_nr][LIST_1 + list_offset][(short)motion->motion[LIST_1][j4][i4].ref_idx];
+ }
+ }
+
+ if (p_Vid->active_sps->direct_8x8_inference_flag)
+ {
+ int i = (decode_block_scan[k_start] & 3);
+ int j = ((decode_block_scan[k_start] >> 2) & 3);
+ perform_mc8x8(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, curr_mb_field);
+ }
+ else
+ {
+ for (k = k_start; k < k_start+BLOCK_MULTIPLE; k ++)
+ {
+ int i = (decode_block_scan[k] & 3);
+ int j = ((decode_block_scan[k] >> 2) & 3);
+ perform_mc(currMB, curr_plane, dec_picture, pred_dir, i, j, list_offset, BLOCK_SIZE, BLOCK_SIZE, curr_mb_field);
+ }
+ }
+ }
+ }
+
+ iTransform(currMB, curr_plane, 0);
+}
+
+/*!
+************************************************************************
+* \brief
+* Copy IPCM coefficients to decoded picture buffer and set parameters for this MB
+* (for IPCM CABAC and IPCM CAVLC 28/11/2003)
+*
+* \author
+* Dong Wang <Dong.Wang@bristol.ac.uk>
+************************************************************************
+*/
+void set_chroma_qp(Macroblock* currMB);
+static inline void update_qp(Macroblock *currMB, int qp)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ currMB->qp = qp;
+ currMB->qp_scaled[0] = qp + p_Vid->bitdepth_luma_qp_scale;
+ set_chroma_qp(currMB);
+ currMB->is_lossless = (Boolean) ((currMB->qp_scaled[0] == 0) && (p_Vid->lossless_qpprime_flag == 1));
+}
+
+void mb_pred_ipcm(Macroblock *currMB)
+{
+ int i, j, k;
+ Slice *currSlice = currMB->p_Slice;
+ VideoParameters *p_Vid = currMB->p_Vid;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+
+ //Copy coefficients to decoded picture buffer
+ //IPCM coefficients are stored in currSlice->ipcm which is set in function read_IPCM_coeffs_from_NAL()
+
+ for(i = 0; i < MB_BLOCK_SIZE; ++i)
+ {
+ for(j = 0;j < MB_BLOCK_SIZE ; ++j)
+ {
+ dec_picture->imgY->img[currMB->pix_y + i][currMB->pix_x + j] = (imgpel) currSlice->ipcm[0][i][j];
+ }
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && !IS_INDEPENDENT(p_Vid))
+ {
+ for (k = 0; k < 2; ++k)
+ {
+ for(i = 0; i < p_Vid->mb_cr_size_y; ++i)
+ {
+ for(j = 0;j < p_Vid->mb_cr_size_x; ++j)
+ {
+ dec_picture->imgUV[k]->img[currMB->pix_c_y+i][currMB->pix_c_x + j] = (imgpel) currSlice->ipcm[k + 1][i][j];
+ }
+ }
+ }
+ }
+
+ // for deblocking filter
+ update_qp(currMB, 0);
+
+ // for CAVLC: Set the nz_coeff to 16.
+ // These parameters are to be used in CAVLC decoding of neighbour blocks
+ memset(&p_Vid->nz_coeff[currMB->mbAddrX][0][0][0], 16, sizeof(h264_nz_coefficient));
+
+ // for CABAC decoding of MB skip flag
+ currMB->skip_flag = 0;
+
+ //for deblocking filter CABAC
+ currMB->cbp_blk[0] = 0xFFFF;
+
+ //For CABAC decoding of Dquant
+ currSlice->last_dquant = 0;
+}
+
diff --git a/Src/h264dec/ldecod/src/mbuffer.c b/Src/h264dec/ldecod/src/mbuffer.c
new file mode 100644
index 00000000..9784d1ef
--- /dev/null
+++ b/Src/h264dec/ldecod/src/mbuffer.c
@@ -0,0 +1,4409 @@
+
+/*!
+ ***********************************************************************
+ * \file
+ * mbuffer.c
+ *
+ * \brief
+ * Frame buffer functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Sühring <suehring@hhi.de>
+ * - Alexis Tourapis <alexismt@ieee.org>
+ * - Jill Boyce <jill.boyce@thomson.net>
+ * - Saurav K Bandyopadhyay <saurav@ieee.org>
+ * - Zhenyu Wu <Zhenyu.Wu@thomson.net
+ * - Purvin Pandit <Purvin.Pandit@thomson.net>
+ *
+ ***********************************************************************
+ */
+
+#include <limits.h>
+
+#include "global.h"
+#include "erc_api.h"
+#include "header.h"
+#include "image.h"
+#include "mbuffer.h"
+#include "memalloc.h"
+#include "output.h"
+
+
+
+static void insert_picture_in_dpb (VideoParameters *p_Vid, FrameStore* fs, StorablePicture* p);
+static void output_one_frame_from_dpb(VideoParameters *p_Vid);
+static void get_smallest_poc (DecodedPictureBuffer *p_Dpb, int *poc,int * pos);
+static void gen_field_ref_ids (StorablePicture *p);
+static int remove_unused_frame_from_dpb (VideoParameters *p_Vid, DecodedPictureBuffer *p_Dpb);
+static int is_used_for_reference (FrameStore* fs);
+static int is_short_term_reference (FrameStore* fs);
+static int is_long_term_reference (FrameStore* fs);
+
+#define MAX_LIST_SIZE 33
+
+/*!
+ ************************************************************************
+ * \brief
+ * Print out list of pictures in DPB. Used for debug purposes.
+ ************************************************************************
+ */
+static void dump_dpb(DecodedPictureBuffer *p_Dpb)
+{
+#if DUMP_DPB
+ unsigned i;
+
+ for (i=0; i<p_Dpb->used_size;i++)
+ {
+ printf("(");
+ printf("fn=%d ", p_Dpb->fs[i]->frame_num);
+ if (p_Dpb->fs[i]->is_used & 1)
+ {
+ if (p_Dpb->fs[i]->top_field)
+ printf("T: poc=%d ", p_Dpb->fs[i]->top_field->poc);
+ else
+ printf("T: poc=%d ", p_Dpb->fs[i]->frame->top_poc);
+ }
+ if (p_Dpb->fs[i]->is_used & 2)
+ {
+ if (p_Dpb->fs[i]->bottom_field)
+ printf("B: poc=%d ", p_Dpb->fs[i]->bottom_field->poc);
+ else
+ printf("B: poc=%d ", p_Dpb->fs[i]->frame->bottom_poc);
+ }
+ if (p_Dpb->fs[i]->is_used == 3)
+ printf("F: poc=%d ", p_Dpb->fs[i]->frame->poc);
+ printf("G: poc=%d) ", p_Dpb->fs[i]->poc);
+ if (p_Dpb->fs[i]->is_reference) printf ("ref (%d) ", p_Dpb->fs[i]->is_reference);
+ if (p_Dpb->fs[i]->is_long_term) printf ("lt_ref (%d) ", p_Dpb->fs[i]->is_reference);
+ if (p_Dpb->fs[i]->is_output) printf ("out ");
+ if (p_Dpb->fs[i]->is_used == 3)
+ {
+ if (p_Dpb->fs[i]->frame->non_existing) printf ("ne ");
+ }
+ printf ("\n");
+ }
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Returns the size of the dpb depending on level and picture size
+ *
+ *
+ ************************************************************************
+ */
+static int getDpbSize(seq_parameter_set_rbsp_t *active_sps)
+{
+ int pic_size = (active_sps->pic_width_in_mbs_minus1 + 1) * (active_sps->pic_height_in_map_units_minus1 + 1) * (active_sps->frame_mbs_only_flag?1:2) * 384;
+
+ int size = 0;
+
+ switch (active_sps->level_idc)
+ {
+ case 9:
+ size = 152064;
+ break;
+ case 10:
+ size = 152064;
+ break;
+ case 11:
+ if (!IS_FREXT_PROFILE(active_sps->profile_idc) && (active_sps->constrained_set3_flag == 1))
+ size = 152064;
+ else
+ size = 345600;
+ break;
+ case 12:
+ size = 912384;
+ break;
+ case 13:
+ size = 912384;
+ break;
+ case 20:
+ size = 912384;
+ break;
+ case 21:
+ size = 1824768;
+ break;
+ case 22:
+ size = 3110400;
+ break;
+ case 30:
+ size = 3110400;
+ break;
+ case 31:
+ size = 6912000;
+ break;
+ case 32:
+ size = 7864320;
+ break;
+ case 40:
+ size = 12582912;
+ break;
+ case 41:
+ size = 12582912;
+ break;
+ case 42:
+ size = 13369344;
+ break;
+ case 50:
+ size = 42393600;
+ break;
+ case 51:
+ size = 70778880;
+ break;
+ default:
+ error ("undefined level", 500);
+ break;
+ }
+
+ size /= pic_size;
+ size = imin( size, 16);
+
+ if (active_sps->vui_parameters_present_flag && active_sps->vui_seq_parameters.bitstream_restriction_flag)
+ {
+ if ((int)active_sps->vui_seq_parameters.max_dec_frame_buffering > size)
+ {
+ error ("max_dec_frame_buffering larger than MaxDpbSize", 500);
+ }
+ size = imax (1, active_sps->vui_seq_parameters.max_dec_frame_buffering);
+ }
+
+ return size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Check then number of frames marked "used for reference" and break
+ * if maximum is exceeded
+ *
+ ************************************************************************
+ */
+void check_num_ref(DecodedPictureBuffer *p_Dpb)
+{
+ if ((int)(p_Dpb->ltref_frames_in_buffer + p_Dpb->ref_frames_in_buffer ) > (imax(1, p_Dpb->num_ref_frames)))
+ {
+ error ("Max. number of reference frames exceeded. Invalid stream.", 500);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate memory for decoded picture buffer and initialize with sane values.
+ *
+ ************************************************************************
+ */
+void init_dpb(VideoParameters *p_Vid)
+{
+ unsigned i,j;
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+ size_t num_output_pictures;
+
+ if (p_Dpb->init_done)
+ {
+ free_dpb(p_Vid);
+ }
+
+ p_Dpb->p_Vid = p_Vid;
+ p_Dpb->size = getDpbSize(active_sps);
+
+ p_Dpb->num_ref_frames = active_sps->num_ref_frames;
+
+ if (p_Dpb->size < active_sps->num_ref_frames)
+ {
+ error ("DPB size at specified level is smaller than the specified number of reference frames. This is not allowed.\n", 1000);
+ }
+
+ p_Dpb->used_size = 0;
+ p_Dpb->last_picture = NULL;
+
+ p_Dpb->ref_frames_in_buffer = 0;
+ p_Dpb->ltref_frames_in_buffer = 0;
+
+ p_Dpb->fs = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==p_Dpb->fs)
+ no_mem_exit("init_dpb: dpb->fs");
+
+ p_Dpb->fs_ref = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==p_Dpb->fs_ref)
+ no_mem_exit("init_dpb: dpb->fs_ref");
+
+ p_Dpb->fs_ltref = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==p_Dpb->fs_ltref)
+ no_mem_exit("init_dpb: dpb->fs_ltref");
+
+ for (i=0; i<p_Dpb->size; i++)
+ {
+ p_Dpb->fs[i] = alloc_frame_store();
+ p_Dpb->fs_ref[i] = NULL;
+ p_Dpb->fs_ltref[i] = NULL;
+ }
+
+ for (i=0; i<6; i++)
+ {
+ p_Vid->listX[i] = calloc(MAX_LIST_SIZE, sizeof (StorablePicture*)); // +1 for reordering
+ if (NULL==p_Vid->listX[i])
+ no_mem_exit("init_dpb: p_Vid->listX[i]");
+ }
+
+ /* allocate a dummy storable picture */
+ p_Vid->no_reference_picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+ p_Vid->no_reference_picture->top_field = p_Vid->no_reference_picture;
+ p_Vid->no_reference_picture->bottom_field = p_Vid->no_reference_picture;
+ p_Vid->no_reference_picture->frame = p_Vid->no_reference_picture;
+
+
+ for (j=0;j<6;j++)
+ {
+ for (i=0; i<MAX_LIST_SIZE; i++)
+ {
+ p_Vid->listX[j][i] = NULL;
+ }
+ p_Vid->listXsize[j]=0;
+ }
+
+ p_Dpb->last_output_poc = INT_MIN;
+
+ p_Vid->last_has_mmco_5 = 0;
+
+ p_Dpb->init_done = 1;
+
+ num_output_pictures = getDpbSize(active_sps) + active_sps->vui_seq_parameters.max_dec_frame_buffering;
+ out_storable_pictures_init(p_Vid, num_output_pictures);
+}
+/*!
+ ************************************************************************
+ * \brief
+ * Free memory for decoded picture buffer.
+ ************************************************************************
+ */
+void free_dpb(VideoParameters *p_Vid)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ unsigned i;
+ if (p_Dpb->fs)
+ {
+ for (i=0; i<p_Dpb->size; i++)
+ {
+ free_frame_store(p_Vid, p_Dpb->fs[i]);
+ }
+ free (p_Dpb->fs);
+ p_Dpb->fs=NULL;
+ }
+ if (p_Dpb->fs_ref)
+ {
+ free (p_Dpb->fs_ref);
+ }
+ if (p_Dpb->fs_ltref)
+ {
+ free (p_Dpb->fs_ltref);
+ }
+ p_Dpb->last_output_poc = INT_MIN;
+
+ for (i=0; i<6; i++)
+ if (p_Vid->listX[i])
+ {
+ free (p_Vid->listX[i]);
+ p_Vid->listX[i] = NULL;
+ }
+
+ p_Dpb->init_done = 0;
+
+ free_storable_picture(p_Vid, p_Vid->no_reference_picture);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate memory for decoded picture buffer frame stores an initialize with sane values.
+ *
+ * \return
+ * the allocated FrameStore structure
+ ************************************************************************
+ */
+FrameStore* alloc_frame_store(void)
+{
+ FrameStore *f;
+
+ f = calloc (1, sizeof(FrameStore));
+ if (NULL==f)
+ no_mem_exit("alloc_frame_store: f");
+
+ f->is_used = 0;
+ f->is_reference = 0;
+ f->is_long_term = 0;
+ f->is_orig_reference = 0;
+
+ f->is_output = 0;
+
+ f->frame = NULL;;
+ f->top_field = NULL;
+ f->bottom_field = NULL;
+
+ return f;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Free frame store memory.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param f
+ * FrameStore to be freed
+ *
+ ************************************************************************
+ */
+void free_frame_store(VideoParameters *p_Vid, FrameStore* f)
+{
+ if (f)
+ {
+ if (f->frame)
+ {
+ free_storable_picture(p_Vid, f->frame);
+ f->frame=NULL;
+ }
+ if (f->top_field)
+ {
+ free_storable_picture(p_Vid, f->top_field);
+ f->top_field=NULL;
+ }
+ if (f->bottom_field)
+ {
+ free_storable_picture(p_Vid, f->bottom_field);
+ f->bottom_field=NULL;
+ }
+ free(f);
+ }
+}
+
+void free_pic_motion(VideoParameters *p_Vid, PicMotionParams *motion, int size_x, int size_y);
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * mark FrameStore unused for reference
+ *
+ ************************************************************************
+ */
+static void unmark_for_reference(VideoParameters *p_Vid, FrameStore* fs)
+{
+
+ if (fs->is_used & 1)
+ {
+ if (fs->top_field)
+ {
+ fs->top_field->used_for_reference = 0;
+ }
+ }
+ if (fs->is_used & 2)
+ {
+ if (fs->bottom_field)
+ {
+ fs->bottom_field->used_for_reference = 0;
+ }
+ }
+ if (fs->is_used == 3)
+ {
+ if (fs->top_field && fs->bottom_field)
+ {
+ fs->top_field->used_for_reference = 0;
+ fs->bottom_field->used_for_reference = 0;
+ }
+ fs->frame->used_for_reference = 0;
+ }
+
+ fs->is_reference = 0;
+
+ if(fs->frame)
+ {
+ free_pic_motion(p_Vid, &fs->frame->motion, fs->frame->size_x, fs->frame->size_y);
+ }
+
+ if (fs->top_field)
+ {
+ free_pic_motion(p_Vid, &fs->top_field->motion, fs->top_field->size_x, fs->top_field->size_y);
+ }
+
+ if (fs->bottom_field)
+ {
+ free_pic_motion(p_Vid, &fs->bottom_field->motion, fs->bottom_field->size_x, fs->bottom_field->size_y);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * mark FrameStore unused for reference and reset long term flags
+ *
+ ************************************************************************
+ */
+static void unmark_for_long_term_reference(FrameStore* fs)
+{
+
+ if (fs->is_used & 1)
+ {
+ if (fs->top_field)
+ {
+ fs->top_field->used_for_reference = 0;
+ fs->top_field->is_long_term = 0;
+ }
+ }
+ if (fs->is_used & 2)
+ {
+ if (fs->bottom_field)
+ {
+ fs->bottom_field->used_for_reference = 0;
+ fs->bottom_field->is_long_term = 0;
+ }
+ }
+ if (fs->is_used == 3)
+ {
+ if (fs->top_field && fs->bottom_field)
+ {
+ fs->top_field->used_for_reference = 0;
+ fs->top_field->is_long_term = 0;
+ fs->bottom_field->used_for_reference = 0;
+ fs->bottom_field->is_long_term = 0;
+ }
+ fs->frame->used_for_reference = 0;
+ fs->frame->is_long_term = 0;
+ }
+
+ fs->is_reference = 0;
+ fs->is_long_term = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two stored pictures by picture number for qsort in descending order
+ *
+ ************************************************************************
+ */
+static inline int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 )
+{
+ int pic_num1 = (*(StorablePicture**)arg1)->pic_num;
+ int pic_num2 = (*(StorablePicture**)arg2)->pic_num;
+
+ if (pic_num1 < pic_num2)
+ return 1;
+ if (pic_num1 > pic_num2)
+ return -1;
+ else
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two stored pictures by picture number for qsort in descending order
+ *
+ ************************************************************************
+ */
+static inline int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 )
+{
+ int long_term_pic_num1 = (*(StorablePicture**)arg1)->long_term_pic_num;
+ int long_term_pic_num2 = (*(StorablePicture**)arg2)->long_term_pic_num;
+
+ if ( long_term_pic_num1 < long_term_pic_num2)
+ return -1;
+ if ( long_term_pic_num1 > long_term_pic_num2)
+ return 1;
+ else
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two frame stores by pic_num for qsort in descending order
+ *
+ ************************************************************************
+ */
+static inline int compare_fs_by_frame_num_desc( const void *arg1, const void *arg2 )
+{
+ int frame_num_wrap1 = (*(FrameStore**)arg1)->frame_num_wrap;
+ int frame_num_wrap2 = (*(FrameStore**)arg2)->frame_num_wrap;
+ if ( frame_num_wrap1 < frame_num_wrap2)
+ return 1;
+ if ( frame_num_wrap1 > frame_num_wrap2)
+ return -1;
+ else
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two frame stores by lt_pic_num for qsort in descending order
+ *
+ ************************************************************************
+ */
+static inline int compare_fs_by_lt_pic_idx_asc( const void *arg1, const void *arg2 )
+{
+ int long_term_frame_idx1 = (*(FrameStore**)arg1)->long_term_frame_idx;
+ int long_term_frame_idx2 = (*(FrameStore**)arg2)->long_term_frame_idx;
+
+ if ( long_term_frame_idx1 < long_term_frame_idx2)
+ return -1;
+ if ( long_term_frame_idx1 > long_term_frame_idx2)
+ return 1;
+ else
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two stored pictures by poc for qsort in ascending order
+ *
+ ************************************************************************
+ */
+static inline int compare_pic_by_poc_asc( const void *arg1, const void *arg2 )
+{
+ int poc1 = (*(StorablePicture**)arg1)->poc;
+ int poc2 = (*(StorablePicture**)arg2)->poc;
+
+ if ( poc1 < poc2)
+ return -1;
+ if ( poc1 > poc2)
+ return 1;
+ else
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two stored pictures by poc for qsort in descending order
+ *
+ ************************************************************************
+ */
+static inline int compare_pic_by_poc_desc( const void *arg1, const void *arg2 )
+{
+ int poc1 = (*(StorablePicture**)arg1)->poc;
+ int poc2 = (*(StorablePicture**)arg2)->poc;
+
+ if (poc1 < poc2)
+ return 1;
+ if (poc1 > poc2)
+ return -1;
+ else
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two frame stores by poc for qsort in ascending order
+ *
+ ************************************************************************
+ */
+static inline int compare_fs_by_poc_asc( const void *arg1, const void *arg2 )
+{
+ int poc1 = (*(FrameStore**)arg1)->poc;
+ int poc2 = (*(FrameStore**)arg2)->poc;
+
+ if (poc1 < poc2)
+ return -1;
+ if (poc1 > poc2)
+ return 1;
+ else
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * compares two frame stores by poc for qsort in descending order
+ *
+ ************************************************************************
+ */
+static inline int compare_fs_by_poc_desc( const void *arg1, const void *arg2 )
+{
+ int poc1 = (*(FrameStore**)arg1)->poc;
+ int poc2 = (*(FrameStore**)arg2)->poc;
+
+ if (poc1 < poc2)
+ return 1;
+ if (poc1 > poc2)
+ return -1;
+ else
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * returns true, if picture is short term reference picture
+ *
+ ************************************************************************
+ */
+int is_short_ref(StorablePicture *s)
+{
+ return ((s->used_for_reference) && (!(s->is_long_term)));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * returns true, if picture is long term reference picture
+ *
+ ************************************************************************
+ */
+int is_long_ref(StorablePicture *s)
+{
+ return ((s->used_for_reference) && (s->is_long_term));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generates a alternating field list from a given FrameStore list
+ *
+ ************************************************************************
+ */
+static void gen_pic_list_from_frame_list(PictureStructure currStructure, FrameStore **fs_list, int list_idx, StorablePicture **list, char *list_size, int long_term)
+{
+ int top_idx = 0;
+ int bot_idx = 0;
+
+ int (*is_ref)(StorablePicture *s);
+
+ if (long_term)
+ is_ref=is_long_ref;
+ else
+ is_ref=is_short_ref;
+
+ if (currStructure == TOP_FIELD)
+ {
+ while ((top_idx<list_idx)||(bot_idx<list_idx))
+ {
+ for ( ; top_idx<list_idx; top_idx++)
+ {
+ if(fs_list[top_idx]->is_used & 1)
+ {
+ if(is_ref(fs_list[top_idx]->top_field))
+ {
+ // short term ref pic
+ list[(short) *list_size] = fs_list[top_idx]->top_field;
+ (*list_size)++;
+ top_idx++;
+ break;
+ }
+ }
+ }
+ for ( ; bot_idx<list_idx; bot_idx++)
+ {
+ if(fs_list[bot_idx]->is_used & 2)
+ {
+ if(is_ref(fs_list[bot_idx]->bottom_field))
+ {
+ // short term ref pic
+ list[(short) *list_size] = fs_list[bot_idx]->bottom_field;
+ (*list_size)++;
+ bot_idx++;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (currStructure == BOTTOM_FIELD)
+ {
+ while ((top_idx<list_idx)||(bot_idx<list_idx))
+ {
+ for ( ; bot_idx<list_idx; bot_idx++)
+ {
+ if(fs_list[bot_idx]->is_used & 2)
+ {
+ if(is_ref(fs_list[bot_idx]->bottom_field))
+ {
+ // short term ref pic
+ list[(short) *list_size] = fs_list[bot_idx]->bottom_field;
+ (*list_size)++;
+ bot_idx++;
+ break;
+ }
+ }
+ }
+ for ( ; top_idx<list_idx; top_idx++)
+ {
+ if(fs_list[top_idx]->is_used & 1)
+ {
+ if(is_ref(fs_list[top_idx]->top_field))
+ {
+ // short term ref pic
+ list[(short) *list_size] = fs_list[top_idx]->top_field;
+ (*list_size)++;
+ top_idx++;
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initialize p_Vid->listX[0] and list 1 depending on current slice type
+ *
+ ************************************************************************
+ */
+void init_lists(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ int add_top = 0, add_bottom = 0;
+ unsigned i;
+ int j;
+ int MaxFrameNum = 1 << (active_sps->log2_max_frame_num_minus4 + 4);
+ int diff;
+
+ int list0idx = 0;
+ int list0idx_1 = 0;
+ int listltidx = 0;
+
+ FrameStore **fs_list0;
+ FrameStore **fs_list1;
+ FrameStore **fs_listlt;
+
+ StorablePicture *tmp_s;
+
+ if (currSlice->structure == FRAME)
+ {
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_used==3)
+ {
+ if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term))
+ {
+ if( p_Dpb->fs_ref[i]->frame_num > p_Vid->frame_num )
+ {
+ p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num - MaxFrameNum;
+ }
+ else
+ {
+ p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num;
+ }
+ p_Dpb->fs_ref[i]->frame->pic_num = p_Dpb->fs_ref[i]->frame_num_wrap;
+ }
+ }
+ }
+ // update long_term_pic_num
+ for (i = 0; i < p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->is_used==3)
+ {
+ if (p_Dpb->fs_ltref[i]->frame->is_long_term)
+ {
+ p_Dpb->fs_ltref[i]->frame->long_term_pic_num = p_Dpb->fs_ltref[i]->frame->long_term_frame_idx;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (currSlice->structure == TOP_FIELD)
+ {
+ add_top = 1;
+ add_bottom = 0;
+ }
+ else
+ {
+ add_top = 0;
+ add_bottom = 1;
+ }
+
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_reference)
+ {
+ if( p_Dpb->fs_ref[i]->frame_num > p_Vid->frame_num )
+ {
+ p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num - MaxFrameNum;
+ }
+ else
+ {
+ p_Dpb->fs_ref[i]->frame_num_wrap = p_Dpb->fs_ref[i]->frame_num;
+ }
+ if (p_Dpb->fs_ref[i]->is_reference & 1)
+ {
+ p_Dpb->fs_ref[i]->top_field->pic_num = (2 * p_Dpb->fs_ref[i]->frame_num_wrap) + add_top;
+ }
+ if (p_Dpb->fs_ref[i]->is_reference & 2)
+ {
+ p_Dpb->fs_ref[i]->bottom_field->pic_num = (2 * p_Dpb->fs_ref[i]->frame_num_wrap) + add_bottom;
+ }
+ }
+ }
+ // update long_term_pic_num
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->is_long_term & 1)
+ {
+ p_Dpb->fs_ltref[i]->top_field->long_term_pic_num = 2 * p_Dpb->fs_ltref[i]->top_field->long_term_frame_idx + add_top;
+ }
+ if (p_Dpb->fs_ltref[i]->is_long_term & 2)
+ {
+ p_Dpb->fs_ltref[i]->bottom_field->long_term_pic_num = 2 * p_Dpb->fs_ltref[i]->bottom_field->long_term_frame_idx + add_bottom;
+ }
+ }
+ }
+
+ if ((currSlice->slice_type == I_SLICE)||(currSlice->slice_type == SI_SLICE))
+ {
+ p_Vid->listXsize[0] = 0;
+ p_Vid->listXsize[1] = 0;
+ return;
+ }
+
+ if ((currSlice->slice_type == P_SLICE)||(currSlice->slice_type == SP_SLICE))
+ {
+ // Calculate FrameNumWrap and PicNum
+ if (currSlice->structure == FRAME)
+ {
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_used==3)
+ {
+ if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term))
+ {
+ p_Vid->listX[0][list0idx++] = p_Dpb->fs_ref[i]->frame;
+ }
+ }
+ }
+ // order list 0 by PicNum
+ qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc);
+ p_Vid->listXsize[0] = (char) list0idx;
+// printf("listX[0] (PicNum): "); for (i=0; i<list0idx; i++){printf ("%d ", p_Vid->listX[0][i]->pic_num);} printf("\n");
+
+ // long term handling
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->is_used==3)
+ {
+ if (p_Dpb->fs_ltref[i]->frame->is_long_term)
+ {
+ p_Vid->listX[0][list0idx++]=p_Dpb->fs_ltref[i]->frame;
+ }
+ }
+ }
+ qsort((void *)&p_Vid->listX[0][(short) p_Vid->listXsize[0]], list0idx - p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+ p_Vid->listXsize[0] = (char) list0idx;
+ }
+ else
+ {
+ fs_list0 = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==fs_list0)
+ no_mem_exit("init_lists: fs_list0");
+ fs_listlt = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==fs_listlt)
+ no_mem_exit("init_lists: fs_listlt");
+
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_reference)
+ {
+ fs_list0[list0idx++] = p_Dpb->fs_ref[i];
+ }
+ }
+
+ qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_frame_num_desc);
+
+// printf("fs_list0 (FrameNum): "); for (i=0; i<list0idx; i++){printf ("%d ", fs_list0[i]->frame_num_wrap);} printf("\n");
+
+ p_Vid->listXsize[0] = 0;
+ gen_pic_list_from_frame_list(currSlice->structure, fs_list0, list0idx, p_Vid->listX[0], &p_Vid->listXsize[0], 0);
+
+// printf("p_Vid->listX[0] (PicNum): "); for (i=0; i<p_Vid->listXsize[0]; i++){printf ("%d ", p_Vid->listX[0][i]->pic_num);} printf("\n");
+
+ // long term handling
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ fs_listlt[listltidx++]=p_Dpb->fs_ltref[i];
+ }
+
+ qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc);
+
+ gen_pic_list_from_frame_list(currSlice->structure, fs_listlt, listltidx, p_Vid->listX[0], &p_Vid->listXsize[0], 1);
+
+ free(fs_list0);
+ free(fs_listlt);
+ }
+ p_Vid->listXsize[1] = 0;
+ }
+ else
+ {
+ // B-Slice
+ if (currSlice->structure == FRAME)
+ {
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_used==3)
+ {
+ if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term))
+ {
+ if (p_Vid->framepoc >= p_Dpb->fs_ref[i]->frame->poc) //!KS use >= for error concealment
+// if (p_Vid->framepoc > p_Dpb->fs_ref[i]->frame->poc)
+ {
+ p_Vid->listX[0][list0idx++] = p_Dpb->fs_ref[i]->frame;
+ }
+ }
+ }
+ }
+ qsort((void *)p_Vid->listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc);
+ list0idx_1 = list0idx;
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_used==3)
+ {
+ if ((p_Dpb->fs_ref[i]->frame->used_for_reference)&&(!p_Dpb->fs_ref[i]->frame->is_long_term))
+ {
+ if (p_Vid->framepoc < p_Dpb->fs_ref[i]->frame->poc)
+ {
+ p_Vid->listX[0][list0idx++] = p_Dpb->fs_ref[i]->frame;
+ }
+ }
+ }
+ }
+ qsort((void *)&p_Vid->listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc);
+
+ for (j=0; j<list0idx_1; j++)
+ {
+ p_Vid->listX[1][list0idx-list0idx_1+j]=p_Vid->listX[0][j];
+ }
+ for (j=list0idx_1; j<list0idx; j++)
+ {
+ p_Vid->listX[1][j-list0idx_1]=p_Vid->listX[0][j];
+ }
+
+ p_Vid->listXsize[0] = p_Vid->listXsize[1] = (char) list0idx;
+
+// printf("p_Vid->listX[0] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[0]; i++){printf ("%d ", p_Vid->listX[0][i]->poc);} printf("\n");
+// printf("p_Vid->listX[1] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[1]; i++){printf ("%d ", p_Vid->listX[1][i]->poc);} printf("\n");
+
+ // long term handling
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->is_used==3)
+ {
+ if (p_Dpb->fs_ltref[i]->frame->is_long_term)
+ {
+ p_Vid->listX[0][list0idx] =p_Dpb->fs_ltref[i]->frame;
+ p_Vid->listX[1][list0idx++]=p_Dpb->fs_ltref[i]->frame;
+ }
+ }
+ }
+ qsort((void *)&p_Vid->listX[0][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+ qsort((void *)&p_Vid->listX[1][(short) p_Vid->listXsize[0]], list0idx-p_Vid->listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+ p_Vid->listXsize[0] = p_Vid->listXsize[1] = (char) list0idx;
+ }
+ else
+ {
+ fs_list0 = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==fs_list0)
+ no_mem_exit("init_lists: fs_list0");
+ fs_list1 = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==fs_list1)
+ no_mem_exit("init_lists: fs_list1");
+ fs_listlt = calloc(p_Dpb->size, sizeof (FrameStore*));
+ if (NULL==fs_listlt)
+ no_mem_exit("init_lists: fs_listlt");
+
+ p_Vid->listXsize[0] = 0;
+ p_Vid->listXsize[1] = 1;
+
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_used)
+ {
+ if (p_Vid->ThisPOC >= p_Dpb->fs_ref[i]->poc)
+ {
+ fs_list0[list0idx++] = p_Dpb->fs_ref[i];
+ }
+ }
+ }
+ qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_poc_desc);
+ list0idx_1 = list0idx;
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_used)
+ {
+ if (p_Vid->ThisPOC < p_Dpb->fs_ref[i]->poc)
+ {
+ fs_list0[list0idx++] = p_Dpb->fs_ref[i];
+ }
+ }
+ }
+ qsort((void *)&fs_list0[list0idx_1], list0idx-list0idx_1, sizeof(FrameStore*), compare_fs_by_poc_asc);
+
+ for (j=0; j<list0idx_1; j++)
+ {
+ fs_list1[list0idx-list0idx_1+j]=fs_list0[j];
+ }
+ for (j=list0idx_1; j<list0idx; j++)
+ {
+ fs_list1[j-list0idx_1]=fs_list0[j];
+ }
+
+// printf("fs_list0 currPoc=%d (Poc): ", p_Vid->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d ", fs_list0[i]->poc);} printf("\n");
+// printf("fs_list1 currPoc=%d (Poc): ", p_Vid->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d ", fs_list1[i]->poc);} printf("\n");
+
+ p_Vid->listXsize[0] = 0;
+ p_Vid->listXsize[1] = 0;
+ gen_pic_list_from_frame_list(currSlice->structure, fs_list0, list0idx, p_Vid->listX[0], &p_Vid->listXsize[0], 0);
+ gen_pic_list_from_frame_list(currSlice->structure, fs_list1, list0idx, p_Vid->listX[1], &p_Vid->listXsize[1], 0);
+
+// printf("p_Vid->listX[0] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[0]; i++){printf ("%d ", p_Vid->listX[0][i]->poc);} printf("\n");
+// printf("p_Vid->listX[1] currPoc=%d (Poc): ", p_Vid->framepoc); for (i=0; i<p_Vid->listXsize[1]; i++){printf ("%d ", p_Vid->listX[1][i]->poc);} printf("\n");
+
+ // long term handling
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ fs_listlt[listltidx++]=p_Dpb->fs_ltref[i];
+ }
+
+ qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc);
+
+ gen_pic_list_from_frame_list(currSlice->structure, fs_listlt, listltidx, p_Vid->listX[0], &p_Vid->listXsize[0], 1);
+ gen_pic_list_from_frame_list(currSlice->structure, fs_listlt, listltidx, p_Vid->listX[1], &p_Vid->listXsize[1], 1);
+
+ free(fs_list0);
+ free(fs_list1);
+ free(fs_listlt);
+ }
+ }
+
+ if ((p_Vid->listXsize[0] == p_Vid->listXsize[1]) && (p_Vid->listXsize[0] > 1))
+ {
+ // check if lists are identical, if yes swap first two elements of p_Vid->listX[1]
+ diff=0;
+ for (j = 0; j< p_Vid->listXsize[0]; j++)
+ {
+ if (p_Vid->listX[0][j]!=p_Vid->listX[1][j])
+ diff=1;
+ }
+ if (!diff)
+ {
+ tmp_s = p_Vid->listX[1][0];
+ p_Vid->listX[1][0]=p_Vid->listX[1][1];
+ p_Vid->listX[1][1]=tmp_s;
+ }
+ }
+ // set max size
+ p_Vid->listXsize[0] = (char) imin (p_Vid->listXsize[0], currSlice->num_ref_idx_l0_active);
+ p_Vid->listXsize[1] = (char) imin (p_Vid->listXsize[1], currSlice->num_ref_idx_l1_active);
+
+ // set the unused list entries to NULL
+ for (i=p_Vid->listXsize[0]; i< (MAX_LIST_SIZE) ; i++)
+ {
+ p_Vid->listX[0][i] = p_Vid->no_reference_picture;
+
+ }
+ for (i=p_Vid->listXsize[1]; i< (MAX_LIST_SIZE) ; i++)
+ {
+ p_Vid->listX[1][i] = p_Vid->no_reference_picture;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initialize listX[2..5] from lists 0 and 1
+ * listX[2]: list0 for current_field==top
+ * listX[3]: list1 for current_field==top
+ * listX[4]: list0 for current_field==bottom
+ * listX[5]: list1 for current_field==bottom
+ *
+ ************************************************************************
+ */
+void init_mbaff_lists(VideoParameters *p_Vid)
+{
+ unsigned j;
+ int i;
+
+ for (i=2;i<6;i++)
+ {
+ for (j=0; j<MAX_LIST_SIZE; j++)
+ {
+ p_Vid->listX[i][j] = p_Vid->no_reference_picture;
+ }
+ p_Vid->listXsize[i]=0;
+ }
+
+ for (i=0; i<p_Vid->listXsize[0]; i++)
+ {
+ p_Vid->listX[2][2*i ] = p_Vid->listX[0][i]->top_field;
+ p_Vid->listX[2][2*i+1] = p_Vid->listX[0][i]->bottom_field;
+ p_Vid->listX[4][2*i ] = p_Vid->listX[0][i]->bottom_field;
+ p_Vid->listX[4][2*i+1] = p_Vid->listX[0][i]->top_field;
+ }
+ p_Vid->listXsize[2]=p_Vid->listXsize[4]=p_Vid->listXsize[0] * 2;
+
+ for (i=0; i<p_Vid->listXsize[1]; i++)
+ {
+ p_Vid->listX[3][2*i ] = p_Vid->listX[1][i]->top_field;
+ p_Vid->listX[3][2*i+1] = p_Vid->listX[1][i]->bottom_field;
+ p_Vid->listX[5][2*i ] = p_Vid->listX[1][i]->bottom_field;
+ p_Vid->listX[5][2*i+1] = p_Vid->listX[1][i]->top_field;
+ }
+ p_Vid->listXsize[3]=p_Vid->listXsize[5]=p_Vid->listXsize[1] * 2;
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ * Returns short term pic with given picNum
+ *
+ ************************************************************************
+ */
+static StorablePicture* get_short_term_pic(VideoParameters *p_Vid, int picNum)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ unsigned i;
+
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Vid->structure==FRAME)
+ {
+ if (p_Dpb->fs_ref[i]->is_reference == 3)
+ if ((!p_Dpb->fs_ref[i]->frame->is_long_term)&&(p_Dpb->fs_ref[i]->frame->pic_num == picNum))
+ return p_Dpb->fs_ref[i]->frame;
+ }
+ else
+ {
+ if (p_Dpb->fs_ref[i]->is_reference & 1)
+ if ((!p_Dpb->fs_ref[i]->top_field->is_long_term)&&(p_Dpb->fs_ref[i]->top_field->pic_num == picNum))
+ return p_Dpb->fs_ref[i]->top_field;
+ if (p_Dpb->fs_ref[i]->is_reference & 2)
+ if ((!p_Dpb->fs_ref[i]->bottom_field->is_long_term)&&(p_Dpb->fs_ref[i]->bottom_field->pic_num == picNum))
+ return p_Dpb->fs_ref[i]->bottom_field;
+ }
+ }
+
+ return p_Vid->no_reference_picture;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Returns long term pic with given LongtermPicNum
+ *
+ ************************************************************************
+ */
+static StorablePicture* get_long_term_pic(VideoParameters *p_Vid, int LongtermPicNum)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ unsigned i;
+
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Vid->structure==FRAME)
+ {
+ if (p_Dpb->fs_ltref[i]->is_reference == 3)
+ if ((p_Dpb->fs_ltref[i]->frame->is_long_term)&&(p_Dpb->fs_ltref[i]->frame->long_term_pic_num == LongtermPicNum))
+ return p_Dpb->fs_ltref[i]->frame;
+ }
+ else
+ {
+ if (p_Dpb->fs_ltref[i]->is_reference & 1)
+ if ((p_Dpb->fs_ltref[i]->top_field->is_long_term)&&(p_Dpb->fs_ltref[i]->top_field->long_term_pic_num == LongtermPicNum))
+ return p_Dpb->fs_ltref[i]->top_field;
+ if (p_Dpb->fs_ltref[i]->is_reference & 2)
+ if ((p_Dpb->fs_ltref[i]->bottom_field->is_long_term)&&(p_Dpb->fs_ltref[i]->bottom_field->long_term_pic_num == LongtermPicNum))
+ return p_Dpb->fs_ltref[i]->bottom_field;
+ }
+ }
+ return NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Reordering process for short-term reference pictures
+ *
+ ************************************************************************
+ */
+static void reorder_short_term(VideoParameters *p_Vid, StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int picNumLX, int *refIdxLX)
+{
+ int cIdx, nIdx;
+
+ StorablePicture *picLX;
+
+ picLX = get_short_term_pic(p_Vid, picNumLX);
+
+ for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- )
+ RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1];
+
+ RefPicListX[ (*refIdxLX)++ ] = picLX;
+
+ nIdx = *refIdxLX;
+
+ for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ )
+ if (RefPicListX[ cIdx ])
+ if( (RefPicListX[ cIdx ]->is_long_term ) || (RefPicListX[ cIdx ]->pic_num != picNumLX ))
+ RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ];
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Reordering process for long-term reference pictures
+ *
+ ************************************************************************
+ */
+static void reorder_long_term(VideoParameters *p_Vid, StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int LongTermPicNum, int *refIdxLX)
+{
+ int cIdx, nIdx;
+
+ StorablePicture *picLX;
+
+ picLX = get_long_term_pic(p_Vid, LongTermPicNum);
+
+ for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- )
+ RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1];
+
+ RefPicListX[ (*refIdxLX)++ ] = picLX;
+
+ nIdx = *refIdxLX;
+
+ for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ )
+ if (RefPicListX[ cIdx ])
+ if( (!RefPicListX[ cIdx ]->is_long_term ) || (RefPicListX[ cIdx ]->long_term_pic_num != LongTermPicNum ))
+ RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ];
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Reordering process for reference picture lists
+ *
+ ************************************************************************
+ */
+void reorder_ref_pic_list(VideoParameters *p_Vid, StorablePicture **list, char *list_size, int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx)
+{
+ int i;
+
+ int maxPicNum, currPicNum, picNumLXNoWrap, picNumLXPred, picNumLX;
+ int refIdxLX = 0;
+
+ if (p_Vid->structure==FRAME)
+ {
+ maxPicNum = p_Vid->MaxFrameNum;
+ currPicNum = p_Vid->frame_num;
+ }
+ else
+ {
+ maxPicNum = 2 * p_Vid->MaxFrameNum;
+ currPicNum = 2 * p_Vid->frame_num + 1;
+ }
+
+ picNumLXPred = currPicNum;
+
+ for (i=0; reordering_of_pic_nums_idc[i]!=3; i++)
+ {
+ if (reordering_of_pic_nums_idc[i]>3)
+ error ("Invalid remapping_of_pic_nums_idc command", 500);
+
+ if (reordering_of_pic_nums_idc[i] < 2)
+ {
+ if (reordering_of_pic_nums_idc[i] == 0)
+ {
+ if( picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) < 0 )
+ picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) + maxPicNum;
+ else
+ picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 );
+ }
+ else // (remapping_of_pic_nums_idc[i] == 1)
+ {
+ if( picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) >= maxPicNum )
+ picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) - maxPicNum;
+ else
+ picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 );
+ }
+ picNumLXPred = picNumLXNoWrap;
+
+ if( picNumLXNoWrap > currPicNum )
+ picNumLX = picNumLXNoWrap - maxPicNum;
+ else
+ picNumLX = picNumLXNoWrap;
+
+ reorder_short_term(p_Vid, list, num_ref_idx_lX_active_minus1, picNumLX, &refIdxLX);
+ }
+ else //(remapping_of_pic_nums_idc[i] == 2)
+ {
+ reorder_long_term(p_Vid, list, num_ref_idx_lX_active_minus1, long_term_pic_idx[i], &refIdxLX);
+ }
+
+ }
+ // that's a definition
+ *list_size = (char)(num_ref_idx_lX_active_minus1 + 1);
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Update the list of frame stores that contain reference frames/fields
+ *
+ ************************************************************************
+ */
+void update_ref_list(DecodedPictureBuffer *p_Dpb)
+{
+ unsigned i, j;
+ for (i=0, j=0; i<p_Dpb->used_size; i++)
+ {
+ if (is_short_term_reference(p_Dpb->fs[i]))
+ {
+ p_Dpb->fs_ref[j++]=p_Dpb->fs[i];
+ }
+ }
+
+ p_Dpb->ref_frames_in_buffer = j;
+
+ while (j<p_Dpb->size)
+ {
+ p_Dpb->fs_ref[j++]=NULL;
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Update the list of frame stores that contain long-term reference
+ * frames/fields
+ *
+ ************************************************************************
+ */
+void update_ltref_list(DecodedPictureBuffer *p_Dpb)
+{
+ unsigned i, j;
+ for (i=0, j=0; i<p_Dpb->used_size; i++)
+ {
+ if (is_long_term_reference(p_Dpb->fs[i]))
+ {
+ p_Dpb->fs_ltref[j++]=p_Dpb->fs[i];
+ }
+ }
+
+ p_Dpb->ltref_frames_in_buffer=j;
+
+ while (j<p_Dpb->size)
+ {
+ p_Dpb->fs_ltref[j++]=NULL;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Perform Memory management for idr pictures
+ *
+ ************************************************************************
+ */
+static void idr_memory_management(VideoParameters *p_Vid, StorablePicture* p)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ unsigned i;
+
+ assert (p->idr_flag);
+
+ if (p->no_output_of_prior_pics_flag)
+ {
+ // free all stored pictures
+ for (i=0; i<p_Dpb->used_size; i++)
+ {
+ // reset all reference settings
+ free_frame_store(p_Vid, p_Dpb->fs[i]);
+ p_Dpb->fs[i] = alloc_frame_store();
+ }
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ p_Dpb->fs_ref[i]=NULL;
+ }
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ p_Dpb->fs_ltref[i]=NULL;
+ }
+ p_Dpb->used_size=0;
+ }
+ else
+ {
+ flush_dpb(p_Vid);
+ }
+ p_Dpb->last_picture = NULL;
+
+ update_ref_list(p_Dpb);
+ update_ltref_list(p_Dpb);
+ p_Dpb->last_output_poc = INT_MIN;
+
+ if (p->long_term_reference_flag)
+ {
+ p_Dpb->max_long_term_pic_idx = 0;
+ p->is_long_term = 1;
+ p->long_term_frame_idx = 0;
+ }
+ else
+ {
+ p_Dpb->max_long_term_pic_idx = -1;
+ p->is_long_term = 0;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Perform Sliding window decoded reference picture marking process
+ *
+ ************************************************************************
+ */
+static void sliding_window_memory_management(DecodedPictureBuffer *p_Dpb, StorablePicture* p)
+{
+ unsigned i;
+
+ assert (!p->idr_flag);
+ // if this is a reference pic with sliding sliding window, unmark first ref frame
+ if (p_Dpb->ref_frames_in_buffer==p_Dpb->num_ref_frames - p_Dpb->ltref_frames_in_buffer)
+ {
+ for (i=0; i<p_Dpb->used_size;i++)
+ {
+ if (p_Dpb->fs[i]->is_reference && (!(p_Dpb->fs[i]->is_long_term)))
+ {
+ unmark_for_reference(p_Dpb->p_Vid, p_Dpb->fs[i]);
+ update_ref_list(p_Dpb);
+ break;
+ }
+ }
+ }
+
+ p->is_long_term = 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Calculate picNumX
+ ************************************************************************
+ */
+static int get_pic_num_x (StorablePicture *p, int difference_of_pic_nums_minus1)
+{
+ int currPicNum;
+
+ if (p->structure == FRAME)
+ currPicNum = p->frame_num;
+ else
+ currPicNum = 2 * p->frame_num + 1;
+
+ return currPicNum - (difference_of_pic_nums_minus1 + 1);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Adaptive Memory Management: Mark short term picture unused
+ ************************************************************************
+ */
+static void mm_unmark_short_term_for_reference(DecodedPictureBuffer *p_Dpb, StorablePicture *p, int difference_of_pic_nums_minus1)
+{
+ int picNumX;
+
+ unsigned i;
+
+ picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1);
+
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p->structure == FRAME)
+ {
+ if ((p_Dpb->fs_ref[i]->is_reference==3) && (p_Dpb->fs_ref[i]->is_long_term==0))
+ {
+ if (p_Dpb->fs_ref[i]->frame->pic_num == picNumX)
+ {
+ unmark_for_reference(p_Dpb->p_Vid, p_Dpb->fs_ref[i]);
+ return;
+ }
+ }
+ }
+ else
+ {
+ if ((p_Dpb->fs_ref[i]->is_reference & 1) && (!(p_Dpb->fs_ref[i]->is_long_term & 1)))
+ {
+ if (p_Dpb->fs_ref[i]->top_field->pic_num == picNumX)
+ {
+ p_Dpb->fs_ref[i]->top_field->used_for_reference = 0;
+ p_Dpb->fs_ref[i]->is_reference &= 2;
+ if (p_Dpb->fs_ref[i]->is_used == 3)
+ {
+ p_Dpb->fs_ref[i]->frame->used_for_reference = 0;
+ }
+ return;
+ }
+ }
+ if ((p_Dpb->fs_ref[i]->is_reference & 2) && (!(p_Dpb->fs_ref[i]->is_long_term & 2)))
+ {
+ if (p_Dpb->fs_ref[i]->bottom_field->pic_num == picNumX)
+ {
+ p_Dpb->fs_ref[i]->bottom_field->used_for_reference = 0;
+ p_Dpb->fs_ref[i]->is_reference &= 1;
+ if (p_Dpb->fs_ref[i]->is_used == 3)
+ {
+ p_Dpb->fs_ref[i]->frame->used_for_reference = 0;
+ }
+ return;
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Adaptive Memory Management: Mark long term picture unused
+ ************************************************************************
+ */
+static void mm_unmark_long_term_for_reference(DecodedPictureBuffer *p_Dpb, StorablePicture *p, int long_term_pic_num)
+{
+ unsigned i;
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p->structure == FRAME)
+ {
+ if ((p_Dpb->fs_ltref[i]->is_reference==3) && (p_Dpb->fs_ltref[i]->is_long_term==3))
+ {
+ if (p_Dpb->fs_ltref[i]->frame->long_term_pic_num == long_term_pic_num)
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ }
+ }
+ else
+ {
+ if ((p_Dpb->fs_ltref[i]->is_reference & 1) && ((p_Dpb->fs_ltref[i]->is_long_term & 1)))
+ {
+ if (p_Dpb->fs_ltref[i]->top_field->long_term_pic_num == long_term_pic_num)
+ {
+ p_Dpb->fs_ltref[i]->top_field->used_for_reference = 0;
+ p_Dpb->fs_ltref[i]->top_field->is_long_term = 0;
+ p_Dpb->fs_ltref[i]->is_reference &= 2;
+ p_Dpb->fs_ltref[i]->is_long_term &= 2;
+ if (p_Dpb->fs_ltref[i]->is_used == 3)
+ {
+ p_Dpb->fs_ltref[i]->frame->used_for_reference = 0;
+ p_Dpb->fs_ltref[i]->frame->is_long_term = 0;
+ }
+ return;
+ }
+ }
+ if ((p_Dpb->fs_ltref[i]->is_reference & 2) && ((p_Dpb->fs_ltref[i]->is_long_term & 2)))
+ {
+ if (p_Dpb->fs_ltref[i]->bottom_field->long_term_pic_num == long_term_pic_num)
+ {
+ p_Dpb->fs_ltref[i]->bottom_field->used_for_reference = 0;
+ p_Dpb->fs_ltref[i]->bottom_field->is_long_term = 0;
+ p_Dpb->fs_ltref[i]->is_reference &= 1;
+ p_Dpb->fs_ltref[i]->is_long_term &= 1;
+ if (p_Dpb->fs_ltref[i]->is_used == 3)
+ {
+ p_Dpb->fs_ltref[i]->frame->used_for_reference = 0;
+ p_Dpb->fs_ltref[i]->frame->is_long_term = 0;
+ }
+ return;
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Mark a long-term reference frame or complementary field pair unused for referemce
+ ************************************************************************
+ */
+static void unmark_long_term_frame_for_reference_by_frame_idx(DecodedPictureBuffer *p_Dpb, int long_term_frame_idx)
+{
+ unsigned i;
+ for(i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->long_term_frame_idx == long_term_frame_idx)
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Mark a long-term reference field unused for reference only if it's not
+ * the complementary field of the picture indicated by picNumX
+ ************************************************************************
+ */
+static void unmark_long_term_field_for_reference_by_frame_idx(VideoParameters *p_Vid, PictureStructure structure, int long_term_frame_idx, int mark_current, unsigned curr_frame_num, int curr_pic_num)
+{
+ unsigned i;
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+
+ assert(structure!=FRAME);
+ if (curr_pic_num<0)
+ curr_pic_num+=(2*p_Vid->MaxFrameNum);
+
+ for(i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->long_term_frame_idx == long_term_frame_idx)
+ {
+ if (structure == TOP_FIELD)
+ {
+ if ((p_Dpb->fs_ltref[i]->is_long_term == 3))
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ else
+ {
+ if ((p_Dpb->fs_ltref[i]->is_long_term == 1))
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ else
+ {
+ if (mark_current)
+ {
+ if (p_Dpb->last_picture)
+ {
+ if ( ( p_Dpb->last_picture != p_Dpb->fs_ltref[i] )|| p_Dpb->last_picture->frame_num != curr_frame_num)
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ else
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ }
+ else
+ {
+ if ((p_Dpb->fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2))
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ }
+ }
+ }
+ }
+ if (structure == BOTTOM_FIELD)
+ {
+ if ((p_Dpb->fs_ltref[i]->is_long_term == 3))
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ else
+ {
+ if ((p_Dpb->fs_ltref[i]->is_long_term == 2))
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ else
+ {
+ if (mark_current)
+ {
+ if (p_Dpb->last_picture)
+ {
+ if ( ( p_Dpb->last_picture != p_Dpb->fs_ltref[i] )|| p_Dpb->last_picture->frame_num != curr_frame_num)
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ else
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ }
+ else
+ {
+ if ((p_Dpb->fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2))
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * mark a picture as long-term reference
+ ************************************************************************
+ */
+static void mark_pic_long_term(DecodedPictureBuffer *p_Dpb, StorablePicture* p, int long_term_frame_idx, int picNumX)
+{
+ unsigned i;
+ int add_top, add_bottom;
+
+ if (p->structure == FRAME)
+ {
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_reference == 3)
+ {
+ if ((!p_Dpb->fs_ref[i]->frame->is_long_term)&&(p_Dpb->fs_ref[i]->frame->pic_num == picNumX))
+ {
+ p_Dpb->fs_ref[i]->long_term_frame_idx = p_Dpb->fs_ref[i]->frame->long_term_frame_idx
+ = long_term_frame_idx;
+ p_Dpb->fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+ p_Dpb->fs_ref[i]->frame->is_long_term = 1;
+
+ if (p_Dpb->fs_ref[i]->top_field && p_Dpb->fs_ref[i]->bottom_field)
+ {
+ p_Dpb->fs_ref[i]->top_field->long_term_frame_idx = p_Dpb->fs_ref[i]->bottom_field->long_term_frame_idx
+ = long_term_frame_idx;
+ p_Dpb->fs_ref[i]->top_field->long_term_pic_num = long_term_frame_idx;
+ p_Dpb->fs_ref[i]->bottom_field->long_term_pic_num = long_term_frame_idx;
+
+ p_Dpb->fs_ref[i]->top_field->is_long_term = p_Dpb->fs_ref[i]->bottom_field->is_long_term
+ = 1;
+
+ }
+ p_Dpb->fs_ref[i]->is_long_term = 3;
+ return;
+ }
+ }
+ }
+ printf ("Warning: reference frame for long term marking not found\n");
+ }
+ else
+ {
+ if (p->structure == TOP_FIELD)
+ {
+ add_top = 1;
+ add_bottom = 0;
+ }
+ else
+ {
+ add_top = 0;
+ add_bottom = 1;
+ }
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_reference & 1)
+ {
+ if ((!p_Dpb->fs_ref[i]->top_field->is_long_term)&&(p_Dpb->fs_ref[i]->top_field->pic_num == picNumX))
+ {
+ if ((p_Dpb->fs_ref[i]->is_long_term) && (p_Dpb->fs_ref[i]->long_term_frame_idx != long_term_frame_idx))
+ {
+ printf ("Warning: assigning long_term_frame_idx different from other field\n");
+ }
+
+ p_Dpb->fs_ref[i]->long_term_frame_idx = p_Dpb->fs_ref[i]->top_field->long_term_frame_idx
+ = long_term_frame_idx;
+ p_Dpb->fs_ref[i]->top_field->long_term_pic_num = 2 * long_term_frame_idx + add_top;
+ p_Dpb->fs_ref[i]->top_field->is_long_term = 1;
+ p_Dpb->fs_ref[i]->is_long_term |= 1;
+ if (p_Dpb->fs_ref[i]->is_long_term == 3)
+ {
+ p_Dpb->fs_ref[i]->frame->is_long_term = 1;
+ p_Dpb->fs_ref[i]->frame->long_term_frame_idx = p_Dpb->fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+ }
+ return;
+ }
+ }
+ if (p_Dpb->fs_ref[i]->is_reference & 2)
+ {
+ if ((!p_Dpb->fs_ref[i]->bottom_field->is_long_term)&&(p_Dpb->fs_ref[i]->bottom_field->pic_num == picNumX))
+ {
+ if ((p_Dpb->fs_ref[i]->is_long_term) && (p_Dpb->fs_ref[i]->long_term_frame_idx != long_term_frame_idx))
+ {
+ printf ("Warning: assigning long_term_frame_idx different from other field\n");
+ }
+
+ p_Dpb->fs_ref[i]->long_term_frame_idx = p_Dpb->fs_ref[i]->bottom_field->long_term_frame_idx
+ = long_term_frame_idx;
+ p_Dpb->fs_ref[i]->bottom_field->long_term_pic_num = 2 * long_term_frame_idx + add_bottom;
+ p_Dpb->fs_ref[i]->bottom_field->is_long_term = 1;
+ p_Dpb->fs_ref[i]->is_long_term |= 2;
+ if (p_Dpb->fs_ref[i]->is_long_term == 3)
+ {
+ p_Dpb->fs_ref[i]->frame->is_long_term = 1;
+ p_Dpb->fs_ref[i]->frame->long_term_frame_idx = p_Dpb->fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+ }
+ return;
+ }
+ }
+ }
+ printf ("Warning: reference field for long term marking not found\n");
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Assign a long term frame index to a short term picture
+ ************************************************************************
+ */
+static void mm_assign_long_term_frame_idx(VideoParameters *p_Vid, StorablePicture* p, int difference_of_pic_nums_minus1, int long_term_frame_idx)
+{
+ int picNumX;
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+
+ picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1);
+
+ // remove frames/fields with same long_term_frame_idx
+ if (p->structure == FRAME)
+ {
+ unmark_long_term_frame_for_reference_by_frame_idx(p_Dpb, long_term_frame_idx);
+ }
+ else
+ {
+ unsigned i;
+ PictureStructure structure = FRAME;
+
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->is_reference & 1)
+ {
+ if (p_Dpb->fs_ref[i]->top_field->pic_num == picNumX)
+ {
+ structure = TOP_FIELD;
+ break;
+ }
+ }
+ if (p_Dpb->fs_ref[i]->is_reference & 2)
+ {
+ if (p_Dpb->fs_ref[i]->bottom_field->pic_num == picNumX)
+ {
+ structure = BOTTOM_FIELD;
+ break;
+ }
+ }
+ }
+ if (structure==FRAME)
+ {
+ error ("field for long term marking not found",200);
+ }
+
+ unmark_long_term_field_for_reference_by_frame_idx(p_Vid, structure, long_term_frame_idx, 0, 0, picNumX);
+ }
+
+ mark_pic_long_term(p_Dpb, p, long_term_frame_idx, picNumX);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Set new max long_term_frame_idx
+ ************************************************************************
+ */
+void mm_update_max_long_term_frame_idx(DecodedPictureBuffer *p_Dpb, int max_long_term_frame_idx_plus1)
+{
+ unsigned i;
+
+ p_Dpb->max_long_term_pic_idx = max_long_term_frame_idx_plus1 - 1;
+
+ // check for invalid frames
+ for (i=0; i<p_Dpb->ltref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ltref[i]->long_term_frame_idx > p_Dpb->max_long_term_pic_idx)
+ {
+ unmark_for_long_term_reference(p_Dpb->fs_ltref[i]);
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Mark all long term reference pictures unused for reference
+ ************************************************************************
+ */
+static void mm_unmark_all_long_term_for_reference (DecodedPictureBuffer *p_Dpb)
+{
+ mm_update_max_long_term_frame_idx(p_Dpb, 0);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Mark all short term reference pictures unused for reference
+ ************************************************************************
+ */
+static void mm_unmark_all_short_term_for_reference (DecodedPictureBuffer *p_Dpb)
+{
+ unsigned int i;
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ unmark_for_reference(p_Dpb->p_Vid, p_Dpb->fs_ref[i]);
+ }
+ update_ref_list(p_Dpb);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Mark the current picture used for long term reference
+ ************************************************************************
+ */
+static void mm_mark_current_picture_long_term(VideoParameters *p_Vid, DecodedPictureBuffer *p_Dpb, StorablePicture *p, int long_term_frame_idx)
+{
+ // remove long term pictures with same long_term_frame_idx
+ if (p->structure == FRAME)
+ {
+ unmark_long_term_frame_for_reference_by_frame_idx(p_Dpb, long_term_frame_idx);
+ }
+ else
+ {
+ unmark_long_term_field_for_reference_by_frame_idx(p_Vid, p->structure, long_term_frame_idx, 1, p->pic_num, 0);
+ }
+
+ p->is_long_term = 1;
+ p->long_term_frame_idx = long_term_frame_idx;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Perform Adaptive memory control decoded reference picture marking process
+ ************************************************************************
+ */
+static void adaptive_memory_management(VideoParameters *p_Vid, StorablePicture* p)
+{
+ DecRefPicMarking_t *tmp_drpm;
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+
+ p_Vid->last_has_mmco_5 = 0;
+
+ assert (!p->idr_flag);
+ assert (p->adaptive_ref_pic_buffering_flag);
+
+ while (p->dec_ref_pic_marking_buffer)
+ {
+ tmp_drpm = p->dec_ref_pic_marking_buffer;
+ switch (tmp_drpm->memory_management_control_operation)
+ {
+ case 0:
+ if (tmp_drpm->Next != NULL)
+ {
+ error ("memory_management_control_operation = 0 not last operation in buffer", 500);
+ }
+ break;
+ case 1:
+ mm_unmark_short_term_for_reference(p_Dpb, p, tmp_drpm->difference_of_pic_nums_minus1);
+ update_ref_list(p_Dpb);
+ break;
+ case 2:
+ mm_unmark_long_term_for_reference(p_Dpb, p, tmp_drpm->long_term_pic_num);
+ update_ltref_list(p_Dpb);
+ break;
+ case 3:
+ mm_assign_long_term_frame_idx(p_Vid, p, tmp_drpm->difference_of_pic_nums_minus1, tmp_drpm->long_term_frame_idx);
+ update_ref_list(p_Dpb);
+ update_ltref_list(p_Dpb);
+ break;
+ case 4:
+ mm_update_max_long_term_frame_idx (p_Dpb, tmp_drpm->max_long_term_frame_idx_plus1);
+ update_ltref_list(p_Dpb);
+ break;
+ case 5:
+ mm_unmark_all_short_term_for_reference(p_Dpb);
+ mm_unmark_all_long_term_for_reference(p_Dpb);
+ p_Vid->last_has_mmco_5 = 1;
+ break;
+ case 6:
+ mm_mark_current_picture_long_term(p_Vid, p_Dpb, p, tmp_drpm->long_term_frame_idx);
+ check_num_ref(p_Dpb);
+ break;
+ default:
+ error ("invalid memory_management_control_operation in buffer", 500);
+ }
+ p->dec_ref_pic_marking_buffer = tmp_drpm->Next;
+ free (tmp_drpm);
+ }
+ if ( p_Vid->last_has_mmco_5 )
+ {
+ p->pic_num = p->frame_num = 0;
+
+ switch (p->structure)
+ {
+ case TOP_FIELD:
+ {
+ p->poc = p->top_poc = p_Vid->toppoc =0;
+ break;
+ }
+ case BOTTOM_FIELD:
+ {
+ p->poc = p->bottom_poc = p_Vid->bottompoc = 0;
+ break;
+ }
+ case FRAME:
+ {
+ p->top_poc -= p->poc;
+ p->bottom_poc -= p->poc;
+
+ p_Vid->toppoc = p->top_poc;
+ p_Vid->bottompoc = p->bottom_poc;
+
+ p->poc = imin (p->top_poc, p->bottom_poc);
+ p_Vid->framepoc = p->poc;
+ break;
+ }
+ }
+ p_Vid->ThisPOC = p->poc;
+ flush_dpb(p_Vid);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Store a picture in DPB. This includes cheking for space in DPB and
+ * flushing frames.
+ * If we received a frame, we need to check for a new store, if we
+ * got a field, check if it's the second field of an already allocated
+ * store.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param p
+ * Picture to be stored
+ *
+ ************************************************************************
+ */
+
+void store_picture_in_dpb(VideoParameters *p_Vid, StorablePicture* p)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ unsigned i;
+ int poc, pos;
+ // picture error concealment
+
+ // diagnostics
+ //printf ("Storing (%s) non-ref pic with frame_num #%d\n", (p->type == FRAME)?"FRAME":(p->type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num);
+ // if frame, check for new store,
+ assert (p!=NULL);
+
+ p_Vid->last_has_mmco_5=0;
+ p_Vid->last_pic_bottom_field = (p->structure == BOTTOM_FIELD);
+
+ if (p->idr_flag)
+ {
+ idr_memory_management(p_Vid, p);
+ // picture error concealment
+ memset(p_Vid->pocs_in_dpb, 0, sizeof(int)*100);
+ }
+ else
+ {
+ // adaptive memory management
+ if (p->used_for_reference && (p->adaptive_ref_pic_buffering_flag))
+ adaptive_memory_management(p_Vid, p);
+ }
+
+ if ((p->structure==TOP_FIELD)||(p->structure==BOTTOM_FIELD))
+ {
+ // check for frame store with same pic_number
+ if (p_Dpb->last_picture)
+ {
+ if ((int)p_Dpb->last_picture->frame_num == p->pic_num)
+ {
+ if (((p->structure==TOP_FIELD)&&(p_Dpb->last_picture->is_used==2))||((p->structure==BOTTOM_FIELD)&&(p_Dpb->last_picture->is_used==1)))
+ {
+ if ((p->used_for_reference && (p_Dpb->last_picture->is_orig_reference!=0))||
+ (!p->used_for_reference && (p_Dpb->last_picture->is_orig_reference==0)))
+ {
+ insert_picture_in_dpb(p_Vid, p_Dpb->last_picture, p);
+ update_ref_list(p_Dpb);
+ update_ltref_list(p_Dpb);
+ dump_dpb(p_Dpb);
+ p_Dpb->last_picture = NULL;
+ return;
+ }
+ }
+ }
+ }
+ }
+
+ // this is a frame or a field which has no stored complementary field
+
+ // sliding window, if necessary
+ if ((!p->idr_flag)&&(p->used_for_reference && (!p->adaptive_ref_pic_buffering_flag)))
+ {
+ sliding_window_memory_management(p_Dpb, p);
+ }
+
+ // first try to remove unused frames
+ if (p_Dpb->used_size==p_Dpb->size)
+ {
+ remove_unused_frame_from_dpb(p_Vid, p_Dpb);
+ }
+
+ // then output frames until one can be removed
+ while (p_Dpb->used_size == p_Dpb->size)
+ {
+ // non-reference frames may be output directly
+ if (!p->used_for_reference)
+ {
+ get_smallest_poc(p_Dpb, &poc, &pos);
+ if ((-1==pos) || (p->poc < poc))
+ {
+ direct_output(p_Vid, p);
+ return;
+ }
+ }
+ // flush a frame
+ output_one_frame_from_dpb(p_Vid);
+ }
+
+ // check for duplicate frame number in short term reference buffer
+ if ((p->used_for_reference)&&(!p->is_long_term))
+ {
+ for (i=0; i<p_Dpb->ref_frames_in_buffer; i++)
+ {
+ if (p_Dpb->fs_ref[i]->frame_num == p->frame_num)
+ {
+ //error("duplicate frame_num in short-term reference picture buffer", 500);
+ //printf("warning");
+ //return;
+ }
+ }
+
+ }
+ // store at end of buffer
+ insert_picture_in_dpb(p_Vid, p_Dpb->fs[p_Dpb->used_size],p);
+
+ // picture error concealment
+ if (p->idr_flag)
+ {
+ p_Vid->earlier_missing_poc = 0;
+ }
+
+ if (p->structure != FRAME)
+ {
+ p_Dpb->last_picture = p_Dpb->fs[p_Dpb->used_size];
+ }
+ else
+ {
+ p_Dpb->last_picture = NULL;
+ }
+
+ p_Dpb->used_size++;
+
+ update_ref_list(p_Dpb);
+ update_ltref_list(p_Dpb);
+
+ check_num_ref(p_Dpb);
+
+ dump_dpb(p_Dpb);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Insert the picture into the DPB. A free DPB position is necessary
+ * for frames, .
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param fs
+ * FrameStore into which the picture will be inserted
+ * \param p
+ * StorablePicture to be inserted
+ *
+ ************************************************************************
+ */
+static void insert_picture_in_dpb(VideoParameters *p_Vid, FrameStore* fs, StorablePicture* p)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+// printf ("insert (%s) pic with frame_num #%d, poc %d\n", (p->structure == FRAME)?"FRAME":(p->structure == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num, p->poc);
+ assert (p!=NULL);
+ assert (fs!=NULL);
+ switch (p->structure)
+ {
+ case FRAME:
+ fs->frame = p;
+ fs->is_used = 3;
+ if (p->used_for_reference)
+ {
+ fs->is_reference = 3;
+ fs->is_orig_reference = 3;
+ if (p->is_long_term)
+ {
+ fs->is_long_term = 3;
+ fs->long_term_frame_idx = p->long_term_frame_idx;
+ }
+ }
+ // generate field views
+ dpb_split_field(p_Vid, fs);
+ break;
+ case TOP_FIELD:
+ fs->top_field = p;
+ fs->is_used |= 1;
+ if (p->used_for_reference)
+ {
+ fs->is_reference |= 1;
+ fs->is_orig_reference |= 1;
+ if (p->is_long_term)
+ {
+ fs->is_long_term |= 1;
+ fs->long_term_frame_idx = p->long_term_frame_idx;
+ }
+ }
+ if (fs->is_used == 3)
+ {
+ // generate frame view
+ dpb_combine_field(p_Vid, fs);
+ fs->frame->time_code = p->time_code;
+ } else
+ {
+ fs->poc = p->poc;
+ gen_field_ref_ids(p);
+ }
+ break;
+ case BOTTOM_FIELD:
+ fs->bottom_field = p;
+ fs->is_used |= 2;
+ if (p->used_for_reference)
+ {
+ fs->is_reference |= 2;
+ fs->is_orig_reference |= 2;
+ if (p->is_long_term)
+ {
+ fs->is_long_term |= 2;
+ fs->long_term_frame_idx = p->long_term_frame_idx;
+ }
+ }
+ if (fs->is_used == 3)
+ {
+ // generate frame view
+ dpb_combine_field(p_Vid, fs);
+ fs->frame->time_code = p->time_code;
+ }
+ else
+ {
+ fs->poc = p->poc;
+ gen_field_ref_ids(p);
+ }
+ break;
+ }
+ fs->frame_num = p->pic_num;
+ fs->recovery_frame = p->recovery_frame;
+
+ fs->is_output = p->is_output;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Check if one of the frames/fields in frame store is used for reference
+ ************************************************************************
+ */
+static int is_used_for_reference(FrameStore* fs)
+{
+ if (fs->is_reference)
+ {
+ return 1;
+ }
+
+ if (fs->is_used == 3) // frame
+ {
+ if (fs->frame->used_for_reference)
+ {
+ return 1;
+ }
+ }
+
+ if (fs->is_used & 1) // top field
+ {
+ if (fs->top_field)
+ {
+ if (fs->top_field->used_for_reference)
+ {
+ return 1;
+ }
+ }
+ }
+
+ if (fs->is_used & 2) // bottom field
+ {
+ if (fs->bottom_field)
+ {
+ if (fs->bottom_field->used_for_reference)
+ {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Check if one of the frames/fields in frame store is used for short-term reference
+ ************************************************************************
+ */
+static int is_short_term_reference(FrameStore* fs)
+{
+
+ if (fs->is_used==3) // frame
+ {
+ if ((fs->frame->used_for_reference)&&(!fs->frame->is_long_term))
+ {
+ return 1;
+ }
+ }
+
+ if (fs->is_used & 1) // top field
+ {
+ if (fs->top_field)
+ {
+ if ((fs->top_field->used_for_reference)&&(!fs->top_field->is_long_term))
+ {
+ return 1;
+ }
+ }
+ }
+
+ if (fs->is_used & 2) // bottom field
+ {
+ if (fs->bottom_field)
+ {
+ if ((fs->bottom_field->used_for_reference)&&(!fs->bottom_field->is_long_term))
+ {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Check if one of the frames/fields in frame store is used for short-term reference
+ ************************************************************************
+ */
+static int is_long_term_reference(FrameStore* fs)
+{
+
+ if (fs->is_used==3) // frame
+ {
+ if ((fs->frame->used_for_reference)&&(fs->frame->is_long_term))
+ {
+ return 1;
+ }
+ }
+
+ if (fs->is_used & 1) // top field
+ {
+ if (fs->top_field)
+ {
+ if ((fs->top_field->used_for_reference)&&(fs->top_field->is_long_term))
+ {
+ return 1;
+ }
+ }
+ }
+
+ if (fs->is_used & 2) // bottom field
+ {
+ if (fs->bottom_field)
+ {
+ if ((fs->bottom_field->used_for_reference)&&(fs->bottom_field->is_long_term))
+ {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * remove one frame from DPB
+ ************************************************************************
+ */
+static void remove_frame_from_dpb(VideoParameters *p_Vid, int pos)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ FrameStore* fs = p_Dpb->fs[pos];
+ FrameStore* tmp;
+ unsigned i;
+
+// printf ("remove frame with frame_num #%d\n", fs->frame_num);
+ switch (fs->is_used)
+ {
+ case 3:
+ free_storable_picture(p_Vid, fs->frame);
+ free_storable_picture(p_Vid, fs->top_field);
+ free_storable_picture(p_Vid, fs->bottom_field);
+ fs->frame=NULL;
+ fs->top_field=NULL;
+ fs->bottom_field=NULL;
+ break;
+ case 2:
+ free_storable_picture(p_Vid, fs->bottom_field);
+ fs->bottom_field=NULL;
+ break;
+ case 1:
+ free_storable_picture(p_Vid, fs->top_field);
+ fs->top_field=NULL;
+ break;
+ case 0:
+ break;
+ default:
+ error("invalid frame store type",500);
+ }
+ fs->is_used = 0;
+ fs->is_long_term = 0;
+ fs->is_reference = 0;
+ fs->is_orig_reference = 0;
+
+ // move empty framestore to end of buffer
+ tmp = p_Dpb->fs[pos];
+
+ for (i=pos; i<p_Dpb->used_size-1;i++)
+ {
+ p_Dpb->fs[i] = p_Dpb->fs[i+1];
+ }
+ p_Dpb->fs[p_Dpb->used_size-1] = tmp;
+ p_Dpb->used_size--;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * find smallest POC in the DPB.
+ ************************************************************************
+ */
+static void get_smallest_poc(DecodedPictureBuffer *p_Dpb, int *poc,int * pos)
+{
+ unsigned i;
+
+ if (p_Dpb->used_size<1)
+ {
+ error("Cannot determine smallest POC, DPB empty.",150);
+ }
+
+ *pos=-1;
+ *poc = INT_MAX;
+ for (i=0; i<p_Dpb->used_size; i++)
+ {
+ if ((*poc > p_Dpb->fs[i]->poc)&&(!p_Dpb->fs[i]->is_output))
+ {
+ *poc = p_Dpb->fs[i]->poc;
+ *pos=i;
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Remove a picture from DPB which is no longer needed.
+ ************************************************************************
+ */
+static int remove_unused_frame_from_dpb(VideoParameters *p_Vid, DecodedPictureBuffer *p_Dpb)
+{
+ unsigned i;
+
+ // check for frames that were already output and no longer used for reference
+ for (i = 0; i < p_Dpb->used_size; i++)
+ {
+ if (p_Dpb->fs[i]->is_output && (!is_used_for_reference(p_Dpb->fs[i])))
+ {
+ remove_frame_from_dpb(p_Vid, i);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Output one picture stored in the DPB.
+ ************************************************************************
+ */
+static void output_one_frame_from_dpb(VideoParameters *p_Vid)
+{
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+ int poc, pos;
+ //diagnostics
+ if (p_Dpb->used_size<1)
+ {
+ error("Cannot output frame, DPB empty.",150);
+ }
+
+ // find smallest POC
+ get_smallest_poc(p_Dpb, &poc, &pos);
+
+ if(pos==-1)
+ {
+ error("no frames for output available", 150);
+ }
+
+ // call the output function
+// printf ("output frame with frame_num #%d, poc %d (dpb. p_Dpb->size=%d, p_Dpb->used_size=%d)\n", p_Dpb->fs[pos]->frame_num, p_Dpb->fs[pos]->frame->poc, p_Dpb->size, p_Dpb->used_size);
+
+
+// JVT-P072 ends
+
+ write_stored_frame(p_Vid, p_Dpb->fs[pos]);
+
+
+ if (p_Dpb->last_output_poc >= poc)
+ {
+ //printf("warning");
+ //error ("output POC must be in ascending order", 150);
+ }
+ p_Dpb->last_output_poc = poc;
+ // free frame store and move empty store to end of buffer
+ if (!is_used_for_reference(p_Dpb->fs[pos]))
+ {
+ remove_frame_from_dpb(p_Vid, pos);
+ }
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * All stored picture are output. Should be called to empty the buffer
+ ************************************************************************
+ */
+void flush_dpb(VideoParameters *p_Vid)
+{
+ unsigned i;
+ DecodedPictureBuffer *p_Dpb = p_Vid->p_Dpb;
+
+ //diagnostics
+// printf("Flush remaining frames from dpb. p_Dpb->size=%d, p_Dpb->used_size=%d\n",p_Dpb->size,p_Dpb->used_size);
+
+
+ // mark all frames unused
+ for (i=0; i<p_Dpb->used_size; i++)
+ {
+ unmark_for_reference (p_Vid, p_Dpb->fs[i]);
+ }
+
+ while (remove_unused_frame_from_dpb(p_Vid, p_Dpb)) ;
+
+ // output frames in POC order
+ while (p_Dpb->used_size)
+ {
+ output_one_frame_from_dpb(p_Vid);
+ }
+
+ p_Dpb->last_output_poc = INT_MIN;
+}
+
+
+static void gen_field_ref_ids(StorablePicture *p)
+{
+ int i,j, dummylist0, dummylist1;
+ //! Generate Frame parameters from field information.
+ for (i=0 ; i<p->size_x/4 ; i++)
+ {
+ for (j=0 ; j<p->size_y/4 ; j++)
+ {
+ dummylist0= p->motion.motion[LIST_0][j][i].ref_idx;
+ dummylist1= p->motion.motion[LIST_1][j][i].ref_idx;
+ //! association with id already known for fields.
+ p->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? p->ref_pic_num[p->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0;
+ p->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? p->ref_pic_num[p->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0;
+ p->motion.field_frame[j][i]=1;
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Extract top field from a frame
+ ************************************************************************
+ */
+void dpb_split_field(VideoParameters *p_Vid, FrameStore *fs)
+{
+ int i, j, ii, jj, jj4;
+ int idiv,jdiv;
+ int currentmb;
+ int dummylist0, dummylist1;
+ int twosz16 = 2 * (fs->frame->size_x >> 4);
+ StorablePicture *fs_top, *fs_btm;
+ StorablePicture *frame = fs->frame;
+
+
+ fs->poc = frame->poc;
+
+ if (!frame->frame_mbs_only_flag)
+ {
+ fs_top = fs->top_field = alloc_storable_picture(p_Vid, TOP_FIELD, frame->size_x, frame->size_y, frame->size_x_cr, frame->size_y_cr);
+ fs_btm = fs->bottom_field = alloc_storable_picture(p_Vid, BOTTOM_FIELD, frame->size_x, frame->size_y, frame->size_x_cr, frame->size_y_cr);
+
+ for (i = 0; i < (frame->size_y>>1); i++)
+ {
+ memcpy(fs_top->imgY->img[i], frame->imgY->img[i*2], frame->size_x*sizeof(imgpel));
+ }
+
+ for (i = 0; i< (frame->size_y_cr>>1); i++)
+ {
+ memcpy(fs_top->imgUV[0]->img[i], frame->imgUV[0]->img[i*2], frame->size_x_cr*sizeof(imgpel));
+ memcpy(fs_top->imgUV[1]->img[i], frame->imgUV[1]->img[i*2], frame->size_x_cr*sizeof(imgpel));
+ }
+
+ for (i = 0; i < (frame->size_y>>1); i++)
+ {
+ memcpy(fs_btm->imgY->img[i], frame->imgY->img[i*2 + 1], frame->size_x*sizeof(imgpel));
+ }
+
+ for (i = 0; i < (frame->size_y_cr>>1); i++)
+ {
+ memcpy(fs_btm->imgUV[0]->img[i], frame->imgUV[0]->img[i*2 + 1], frame->size_x_cr*sizeof(imgpel));
+ memcpy(fs_btm->imgUV[1]->img[i], frame->imgUV[1]->img[i*2 + 1], frame->size_x_cr*sizeof(imgpel));
+ }
+
+ fs_top->poc = frame->top_poc;
+ fs_btm->poc = frame->bottom_poc;
+
+ fs_top->frame_poc = frame->frame_poc;
+
+ fs_top->bottom_poc = fs_btm->bottom_poc = frame->bottom_poc;
+ fs_top->top_poc = fs_btm->top_poc = frame->top_poc;
+ fs_btm->frame_poc = frame->frame_poc;
+
+ fs_top->used_for_reference = fs_btm->used_for_reference
+ = frame->used_for_reference;
+ fs_top->is_long_term = fs_btm->is_long_term
+ = frame->is_long_term;
+ fs->long_term_frame_idx = fs_top->long_term_frame_idx
+ = fs_btm->long_term_frame_idx
+ = frame->long_term_frame_idx;
+
+ fs_top->coded_frame = fs_btm->coded_frame = 1;
+ fs_top->mb_aff_frame_flag = fs_btm->mb_aff_frame_flag
+ = frame->mb_aff_frame_flag;
+
+ frame->top_field = fs_top;
+ frame->bottom_field = fs_btm;
+
+ fs_top->bottom_field = fs_btm;
+ fs_top->frame = frame;
+ fs_btm->top_field = fs_top;
+ fs_btm->frame = frame;
+
+ fs_top->chroma_format_idc = fs_btm->chroma_format_idc = frame->chroma_format_idc;
+
+ //store reference picture index
+ for (j=0; j<=frame->max_slice_id; j++)
+ {
+ memcpy(&fs_top->ref_pic_num[j][LIST_0][0], &frame->ref_pic_num[j][2 + LIST_0][0], 66 * sizeof(h264_ref_t));
+ //memcpy(&fs_top->ref_pic_num[j][LIST_1][0], &frame->ref_pic_num[j][2 + LIST_1][0], 33 * sizeof(int64));
+ memcpy(&fs_btm->ref_pic_num[j][LIST_0][0], &frame->ref_pic_num[j][4 + LIST_0][0], 66 * sizeof(h264_ref_t));
+ //memcpy(&fs_btm->ref_pic_num[j][LIST_1][0], &frame->ref_pic_num[j][4 + LIST_1][0], 33 * sizeof(int64));
+ }
+ }
+ else
+ {
+ fs_top=NULL;
+ fs_btm=NULL;
+ frame->top_field=NULL;
+ frame->bottom_field=NULL;
+ }
+
+ if (!frame->mb_aff_frame_flag)
+ {
+ // TODO: benski> this part of the function is kinda slow and the compiler
+ // isn't too good at optimizing it
+ // can probably optimize by using CMOV eax, -1 in the inner loop
+ int ii, jj;
+ PicMotionParams *motion = &frame->motion;
+ for (j = 0; (j < frame->size_y >> 2) ; j+=4)
+ {
+ const short *slicej = frame->slice_id[j>>2];
+ for (jj = 0;jj<4;jj++)
+ {
+ PicMotion *ref0=frame->motion.motion[LIST_0][j+jj];
+ PicMotion *ref1=frame->motion.motion[LIST_1][j+jj];
+ for (i = 0 ; i < (frame->size_x >> 2) ; i+=4)
+ {
+ short slice = slicej[i>>2];
+ const h264_ref_t *ref_pic_num0 = frame->ref_pic_num[slice][LIST_0];
+ const h264_ref_t *ref_pic_num1 = frame->ref_pic_num[slice][LIST_1];
+ for (ii=0;ii<4;ii++)
+ {
+ dummylist0 = ref0[i+ii].ref_idx;
+ ref0[i+ii].ref_id = (dummylist0>=0)? ref_pic_num0[dummylist0] : -1;
+ dummylist1 = ref1[i+ii].ref_idx;
+ ref1[i+ii].ref_id = (dummylist1>=0)? ref_pic_num1[dummylist1] : -1;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j = 0; (j < frame->size_y >> 2) ; j++)
+ {
+ jdiv = j >> 2;
+ for (i = 0 ; i < (frame->size_x >> 2) ; i++)
+ {
+ idiv = (i >> 2);
+ currentmb = twosz16*(jdiv >> 1)+ (idiv)*2 + (jdiv & 0x01);
+
+ if (frame->motion.mb_field[currentmb])
+ {
+ int list_offset = currentmb&1;
+ dummylist0 = frame->motion.motion[LIST_0][j][i].ref_idx;
+ dummylist1 = frame->motion.motion[LIST_1][j][i].ref_idx;
+ //! association with id already known for fields.
+ frame->motion.field_references[2*list_offset ][j][i]= (dummylist0>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_0 + list_offset*2 + 2][dummylist0] : 0;
+ frame->motion.field_references[2*list_offset+1][j][i]= (dummylist1>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_1 + list_offset*2 + 2][dummylist1] : 0;
+ //! need to make association with frames
+ frame->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->frm_ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_0 + list_offset*2 + 2][dummylist0] : 0;
+ frame->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->frm_ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_1 + list_offset*2 + 2][dummylist1] : 0;
+
+ }
+ else
+ {
+ dummylist0 = frame->motion.motion[LIST_0][j][i].ref_idx;
+ dummylist1 = frame->motion.motion[LIST_1][j][i].ref_idx;
+ frame->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_0][dummylist0] : -1;
+ frame->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->ref_pic_num[frame->slice_id[jdiv][idiv]][LIST_1][dummylist1] : -1;
+ }
+ }
+ }
+ }
+
+ if (!frame->frame_mbs_only_flag)
+ {
+ if (frame->mb_aff_frame_flag)
+ {
+ PicMotionParams *frm_motion = &frame->motion;
+ PicMotionParams *top_motion = &fs_top->motion;
+ PicMotionParams *btm_motion = &fs_btm->motion;
+ for (j=0 ; j< (frame->size_y >> 3); j++)
+ {
+ jj = (j >> 2)*8 + (j & 0x03);
+ jj4 = jj + 4;
+ jdiv = (j >> 1);
+ for (i=0 ; i < (frame->size_x>>2); i++)
+ {
+ idiv = (i >> 2);
+
+ currentmb = twosz16*(jdiv >> 1)+ (idiv)*2 + (jdiv & 0x01);
+ // Assign field mvs attached to MB-Frame buffer to the proper buffer
+ if (frm_motion->mb_field[currentmb])
+ {
+ btm_motion->field_frame[j][i] = top_motion->field_frame[j][i]=1;
+ frm_motion->field_frame[2*j][i] = frm_motion->field_frame[2*j+1][i]=1;
+
+ btm_motion->motion[LIST_0][j][i].mv[0] = frm_motion->motion[LIST_0][jj4][i].mv[0];
+ btm_motion->motion[LIST_0][j][i].mv[1] = frm_motion->motion[LIST_0][jj4][i].mv[1];
+ btm_motion->motion[LIST_1][j][i].mv[0] = frm_motion->motion[LIST_1][jj4][i].mv[0];
+ btm_motion->motion[LIST_1][j][i].mv[1] = frm_motion->motion[LIST_1][jj4][i].mv[1];
+ btm_motion->motion[LIST_0][j][i].ref_idx = frm_motion->motion[LIST_0][jj4][i].ref_idx;
+ btm_motion->motion[LIST_1][j][i].ref_idx = frm_motion->motion[LIST_1][jj4][i].ref_idx;
+ btm_motion->motion[LIST_0][j][i].ref_id = frm_motion->field_references[2][jj4][i];
+ btm_motion->motion[LIST_1][j][i].ref_id = frm_motion->field_references[3][jj4][i];
+
+
+ top_motion->motion[LIST_0][j][i].mv[0] = frm_motion->motion[LIST_0][jj][i].mv[0];
+ top_motion->motion[LIST_0][j][i].mv[1] = frm_motion->motion[LIST_0][jj][i].mv[1];
+ top_motion->motion[LIST_1][j][i].mv[0] = frm_motion->motion[LIST_1][jj][i].mv[0];
+ top_motion->motion[LIST_1][j][i].mv[1] = frm_motion->motion[LIST_1][jj][i].mv[1];
+ top_motion->motion[LIST_0][j][i].ref_idx = frm_motion->motion[LIST_0][jj][i].ref_idx;
+ top_motion->motion[LIST_1][j][i].ref_idx = frm_motion->motion[LIST_1][jj][i].ref_idx;
+ top_motion->motion[LIST_0][j][i].ref_id = frm_motion->field_references[0][jj][i];
+ top_motion->motion[LIST_1][j][i].ref_id = frm_motion->field_references[1][jj][i];
+ }
+ }
+ }
+ }
+
+ //! Generate field MVs from Frame MVs
+ for (j=0 ; j < (frame->size_y >> 3) ; j++)
+ {
+ jj = 2* RSD(j);
+ jdiv = (j >> 1);
+ for (i=0 ; i < (frame->size_x >> 2) ; i++)
+ {
+ ii = RSD(i);
+ idiv = (i >> 2);
+
+ currentmb = twosz16 * (jdiv >> 1)+ (idiv)*2 + (jdiv & 0x01);
+
+ if (!frame->mb_aff_frame_flag || !frame->motion.mb_field[currentmb])
+ {
+ frame->motion.field_frame[2*j+1][i] = frame->motion.field_frame[2*j][i]=0;
+
+ fs_top->motion.field_frame[j][i] = fs_btm->motion.field_frame[j][i] = 0;
+
+ fs_top->motion.motion[LIST_0][j][i].mv[0] = fs_btm->motion.motion[LIST_0][j][i].mv[0] = frame->motion.motion[LIST_0][jj][ii].mv[0];
+ fs_top->motion.motion[LIST_0][j][i].mv[1] = fs_btm->motion.motion[LIST_0][j][i].mv[1] = frame->motion.motion[LIST_0][jj][ii].mv[1];
+ fs_top->motion.motion[LIST_1][j][i].mv[0] = fs_btm->motion.motion[LIST_1][j][i].mv[0] = frame->motion.motion[LIST_1][jj][ii].mv[0];
+ fs_top->motion.motion[LIST_1][j][i].mv[1] = fs_btm->motion.motion[LIST_1][j][i].mv[1] = frame->motion.motion[LIST_1][jj][ii].mv[1];
+
+ // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+ if (frame->motion.motion[LIST_0][jj][ii].ref_idx == -1)
+ fs_top->motion.motion[LIST_0][j][i].ref_idx = fs_btm->motion.motion[LIST_0][j][i].ref_idx = - 1;
+ else
+ {
+ dummylist0=fs_top->motion.motion[LIST_0][j][i].ref_idx = fs_btm->motion.motion[LIST_0][j][i].ref_idx = frame->motion.motion[LIST_0][jj][ii].ref_idx;
+ fs_top->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->top_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_0][dummylist0] : 0;
+ fs_btm->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? frame->bottom_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_0][dummylist0] : 0;
+ }
+
+ if (frame->motion.motion[LIST_1][jj][ii].ref_idx == -1)
+ fs_top->motion.motion[LIST_1][j][i].ref_idx = fs_btm->motion.motion[LIST_1][j][i].ref_idx = - 1;
+ else
+ {
+ dummylist1=fs_top->motion.motion[LIST_1][j][i].ref_idx = fs_btm->motion.motion[LIST_1][j][i].ref_idx = frame->motion.motion[LIST_1][jj][ii].ref_idx;
+
+ fs_top->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->top_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_1][dummylist1] : 0;
+ fs_btm->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? frame->bottom_ref_pic_num[frame->slice_id[jj>>2][ii>>2]][LIST_1][dummylist1] : 0;
+ }
+ }
+ else
+ {
+ frame->motion.field_frame[2*j+1][i] = frame->motion.field_frame[2*j][i]= frame->motion.mb_field[currentmb];
+ }
+ }
+ }
+ }
+ else
+ {
+ memset( &(frame->motion.field_frame[0][0]), 0, (frame->size_y * frame->size_x >> 4) * sizeof(byte));
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate a frame from top and bottom fields,
+ * YUV components and display information only
+ ************************************************************************
+ */
+void dpb_combine_field_yuv(VideoParameters *p_Vid, FrameStore *fs)
+{
+ int i, j;
+
+ fs->frame = alloc_storable_picture(p_Vid, FRAME, fs->top_field->size_x, fs->top_field->size_y*2, fs->top_field->size_x_cr, fs->top_field->size_y_cr*2);
+
+ for (i=0; i<fs->top_field->size_y; i++)
+ {
+ memcpy(fs->frame->imgY->img[i*2], fs->top_field->imgY->img[i] , fs->top_field->size_x * sizeof(imgpel)); // top field
+ memcpy(fs->frame->imgY->img[i*2 + 1], fs->bottom_field->imgY->img[i], fs->bottom_field->size_x * sizeof(imgpel)); // bottom field
+ }
+
+ for (j = 0; j < 2; j++)
+ {
+ for (i=0; i<fs->top_field->size_y_cr; i++)
+ {
+ memcpy(fs->frame->imgUV[j]->img[i*2], fs->top_field->imgUV[j]->img[i], fs->top_field->size_x_cr*sizeof(imgpel));
+ memcpy(fs->frame->imgUV[j]->img[i*2 + 1], fs->bottom_field->imgUV[j]->img[i], fs->bottom_field->size_x_cr*sizeof(imgpel));
+ }
+ }
+
+ fs->poc=fs->frame->poc =fs->frame->frame_poc = imin (fs->top_field->poc, fs->bottom_field->poc);
+
+ fs->bottom_field->frame_poc=fs->top_field->frame_poc=fs->frame->poc;
+
+ fs->bottom_field->top_poc=fs->frame->top_poc=fs->top_field->poc;
+ fs->top_field->bottom_poc=fs->frame->bottom_poc=fs->bottom_field->poc;
+
+ fs->frame->used_for_reference = (fs->top_field->used_for_reference && fs->bottom_field->used_for_reference );
+ fs->frame->is_long_term = (fs->top_field->is_long_term && fs->bottom_field->is_long_term );
+
+ if (fs->frame->is_long_term)
+ fs->frame->long_term_frame_idx = fs->long_term_frame_idx;
+
+ fs->frame->top_field = fs->top_field;
+ fs->frame->bottom_field = fs->bottom_field;
+
+ fs->frame->coded_frame = 0;
+
+ fs->frame->chroma_format_idc = fs->top_field->chroma_format_idc;
+ fs->frame->frame_cropping_flag = fs->top_field->frame_cropping_flag;
+ if (fs->frame->frame_cropping_flag)
+ {
+ fs->frame->frame_cropping_rect_top_offset = fs->top_field->frame_cropping_rect_top_offset;
+ fs->frame->frame_cropping_rect_bottom_offset = fs->top_field->frame_cropping_rect_bottom_offset;
+ fs->frame->frame_cropping_rect_left_offset = fs->top_field->frame_cropping_rect_left_offset;
+ fs->frame->frame_cropping_rect_right_offset = fs->top_field->frame_cropping_rect_right_offset;
+ }
+
+ fs->top_field->frame = fs->bottom_field->frame = fs->frame;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Generate a frame from top and bottom fields
+ ************************************************************************
+ */
+void dpb_combine_field(VideoParameters *p_Vid, FrameStore *fs)
+{
+ int i,j, k, jj, jj4;
+ int dummylist0, dummylist1;
+
+ dpb_combine_field_yuv(p_Vid, fs);
+
+
+ //combine field for frame
+ for (j=0; j<=(imax(fs->top_field->max_slice_id, fs->bottom_field->max_slice_id)); j++)
+ {
+ for (k = LIST_0; k <= LIST_1; k++)
+ {
+ for (i=0;i<16;i++)
+ {
+ fs->frame->ref_pic_num[j][k][i]= (h264_ref_t) i64min ((fs->top_field->ref_pic_num[j][k][2*i]/2)*2, (fs->bottom_field->ref_pic_num[j][k][2*i]/2)*2);
+ }
+ }
+ }
+
+ //! Use inference flag to remap mvs/references
+
+ //! Generate Frame parameters from field information.
+ for (j=0 ; j < (fs->top_field->size_y >> 2) ; j++)
+ {
+ jj = 8*(j >> 2) + (j & 0x03);
+ jj4 = jj + 4;
+ for (i=0 ; i< (fs->top_field->size_x >> 2) ; i++)
+ {
+ fs->frame->motion.field_frame[jj][i]= fs->frame->motion.field_frame[jj4][i]=1;
+
+ fs->frame->motion.motion[LIST_0][jj][i].mv[0] = fs->top_field->motion.motion[LIST_0][j][i].mv[0];
+ fs->frame->motion.motion[LIST_0][jj][i].mv[1] = fs->top_field->motion.motion[LIST_0][j][i].mv[1];
+ fs->frame->motion.motion[LIST_1][jj][i].mv[0] = fs->top_field->motion.motion[LIST_1][j][i].mv[0];
+ fs->frame->motion.motion[LIST_1][jj][i].mv[1] = fs->top_field->motion.motion[LIST_1][j][i].mv[1];
+
+ dummylist0=fs->frame->motion.motion[LIST_0][jj][i].ref_idx = fs->top_field->motion.motion[LIST_0][j][i].ref_idx;
+ dummylist1=fs->frame->motion.motion[LIST_1][jj][i].ref_idx = fs->top_field->motion.motion[LIST_1][j][i].ref_idx;
+
+ //! association with id already known for fields.
+ fs->top_field->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? fs->top_field->ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0;
+ fs->top_field->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? fs->top_field->ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0;
+
+ //! need to make association with frames
+ fs->frame->motion.motion[LIST_0][jj][i].ref_id = (dummylist0>=0)? fs->top_field->frm_ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0;
+ fs->frame->motion.motion[LIST_1][jj][i].ref_id = (dummylist1>=0)? fs->top_field->frm_ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0;
+
+ fs->frame->motion.motion[LIST_0][jj4][i].mv[0] = fs->bottom_field->motion.motion[LIST_0][j][i].mv[0];
+ fs->frame->motion.motion[LIST_0][jj4][i].mv[1] = fs->bottom_field->motion.motion[LIST_0][j][i].mv[1] ;
+ fs->frame->motion.motion[LIST_1][jj4][i].mv[0] = fs->bottom_field->motion.motion[LIST_1][j][i].mv[0];
+ fs->frame->motion.motion[LIST_1][jj4][i].mv[1] = fs->bottom_field->motion.motion[LIST_1][j][i].mv[1] ;
+
+ dummylist0=fs->frame->motion.motion[LIST_0][jj4][i].ref_idx = fs->bottom_field->motion.motion[LIST_0][j][i].ref_idx;
+ dummylist1=fs->frame->motion.motion[LIST_1][jj4][i].ref_idx = fs->bottom_field->motion.motion[LIST_1][j][i].ref_idx;
+
+ fs->bottom_field->motion.motion[LIST_0][j][i].ref_id = (dummylist0>=0)? fs->bottom_field->ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0;
+ fs->bottom_field->motion.motion[LIST_1][j][i].ref_id = (dummylist1>=0)? fs->bottom_field->ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0;
+
+ //! need to make association with frames
+ fs->frame->motion.motion[LIST_0][jj4][i].ref_id = (dummylist0>=0)? fs->bottom_field->frm_ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : -1;
+ fs->frame->motion.motion[LIST_1][jj4][i].ref_id = (dummylist1>=0)? fs->bottom_field->frm_ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : -1;
+
+ fs->top_field->motion.field_frame[j][i]=1;
+ fs->bottom_field->motion.field_frame[j][i]=1;
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate memory for buffering of reference picture reordering commands
+ ************************************************************************
+ */
+void alloc_ref_pic_list_reordering_buffer(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ int size = currSlice->num_ref_idx_l0_active + 1;
+
+ if (p_Vid->type!=I_SLICE && p_Vid->type!=SI_SLICE)
+ {
+ if ((currSlice->reordering_of_pic_nums_idc_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: reordering_of_pic_nums_idc_l0");
+ if ((currSlice->abs_diff_pic_num_minus1_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l0");
+ if ((currSlice->long_term_pic_idx_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l0");
+ }
+ else
+ {
+ currSlice->reordering_of_pic_nums_idc_l0 = NULL;
+ currSlice->abs_diff_pic_num_minus1_l0 = NULL;
+ currSlice->long_term_pic_idx_l0 = NULL;
+ }
+
+ size = currSlice->num_ref_idx_l1_active+1;
+
+ if (p_Vid->type==B_SLICE)
+ {
+ if ((currSlice->reordering_of_pic_nums_idc_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: reordering_of_pic_nums_idc_l1");
+ if ((currSlice->abs_diff_pic_num_minus1_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l1");
+ if ((currSlice->long_term_pic_idx_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l1");
+ }
+ else
+ {
+ currSlice->reordering_of_pic_nums_idc_l1 = NULL;
+ currSlice->abs_diff_pic_num_minus1_l1 = NULL;
+ currSlice->long_term_pic_idx_l1 = NULL;
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Free memory for buffering of reference picture reordering commands
+ ************************************************************************
+ */
+void free_ref_pic_list_reordering_buffer(Slice *currSlice)
+{
+
+ if (currSlice->reordering_of_pic_nums_idc_l0)
+ free(currSlice->reordering_of_pic_nums_idc_l0);
+ if (currSlice->abs_diff_pic_num_minus1_l0)
+ free(currSlice->abs_diff_pic_num_minus1_l0);
+ if (currSlice->long_term_pic_idx_l0)
+ free(currSlice->long_term_pic_idx_l0);
+
+ currSlice->reordering_of_pic_nums_idc_l0 = NULL;
+ currSlice->abs_diff_pic_num_minus1_l0 = NULL;
+ currSlice->long_term_pic_idx_l0 = NULL;
+
+ if (currSlice->reordering_of_pic_nums_idc_l1)
+ free(currSlice->reordering_of_pic_nums_idc_l1);
+ if (currSlice->abs_diff_pic_num_minus1_l1)
+ free(currSlice->abs_diff_pic_num_minus1_l1);
+ if (currSlice->long_term_pic_idx_l1)
+ free(currSlice->long_term_pic_idx_l1);
+
+ currSlice->reordering_of_pic_nums_idc_l1 = NULL;
+ currSlice->abs_diff_pic_num_minus1_l1 = NULL;
+ currSlice->long_term_pic_idx_l1 = NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Tian Dong
+ * June 13, 2002, Modifed on July 30, 2003
+ *
+ * If a gap in frame_num is found, try to fill the gap
+ * \param p_Vid
+ *
+ ************************************************************************
+ */
+// TODO: benski> pass in timecode
+void fill_frame_num_gap(VideoParameters *p_Vid)
+{
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ int CurrFrameNum;
+ int UnusedShortTermFrameNum;
+ StorablePicture *picture = NULL;
+ int tmp1 = p_Vid->delta_pic_order_cnt[0];
+ int tmp2 = p_Vid->delta_pic_order_cnt[1];
+ p_Vid->delta_pic_order_cnt[0] = p_Vid->delta_pic_order_cnt[1] = 0;
+
+// printf("A gap in frame number is found, try to fill it.\n");
+
+ UnusedShortTermFrameNum = (p_Vid->pre_frame_num + 1) % p_Vid->MaxFrameNum;
+ CurrFrameNum = p_Vid->frame_num;
+
+ while (CurrFrameNum != UnusedShortTermFrameNum)
+ {
+ picture = alloc_storable_picture (p_Vid, FRAME, p_Vid->width, p_Vid->height, p_Vid->width_cr, p_Vid->height_cr);
+ picture->coded_frame = 1;
+ picture->pic_num = UnusedShortTermFrameNum;
+ picture->frame_num = UnusedShortTermFrameNum;
+ picture->non_existing = 1;
+ picture->is_output = 1;
+ picture->used_for_reference = 1;
+
+ picture->adaptive_ref_pic_buffering_flag = 0;
+
+ p_Vid->frame_num = UnusedShortTermFrameNum;
+ if (active_sps->pic_order_cnt_type!=0)
+ {
+ decode_poc(p_Vid);
+ }
+ picture->top_poc=p_Vid->toppoc;
+ picture->bottom_poc=p_Vid->bottompoc;
+ picture->frame_poc=p_Vid->framepoc;
+ picture->poc=p_Vid->framepoc;
+
+ store_picture_in_dpb(p_Vid, picture);
+
+ picture=NULL;
+ p_Vid->pre_frame_num = UnusedShortTermFrameNum;
+ UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % p_Vid->MaxFrameNum;
+ }
+ p_Vid->delta_pic_order_cnt[0] = tmp1;
+ p_Vid->delta_pic_order_cnt[1] = tmp2;
+ p_Vid->frame_num = CurrFrameNum;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate motion parameter memory for colocated structure
+ *
+ ************************************************************************
+ */
+#define ROUNDUP2(size) (((size)+1) & ~1)
+void alloc_motion_params(VideoParameters *p_Vid, MotionParams *ftype, int size_y, int size_x)
+{
+ ftype->motion[0] = 0;
+ ftype->motion[1] = 0;
+ if (motion_cache_dimensions_match(&p_Vid->motion_cache, size_x, size_y))
+ {
+ ftype->motion[0] = motion_cache_get(&p_Vid->motion_cache);
+ ftype->motion[1] = motion_cache_get(&p_Vid->motion_cache);
+ }
+ if (!ftype->motion[0])
+ get_mem2DPicMotion(&(ftype->motion[0]), size_y, size_x);
+ if (!ftype->motion[1])
+ get_mem2DPicMotion(&(ftype->motion[1]), size_y, size_x);
+
+ get_mem2D (&(ftype->moving_block) , ROUNDUP2(size_y), ROUNDUP2(size_x));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate co-located memory
+ *
+ * \param size_x
+ * horizontal luma size
+ * \param size_y
+ * vertical luma size
+ * \param mb_adaptive_frame_field_flag
+ * flag that indicates macroblock adaptive frame/field coding
+ *
+ * \return
+ * the allocated StorablePicture structure
+ ************************************************************************
+ */
+ColocatedParams* alloc_colocated(VideoParameters *p_Vid, int size_x, int size_y, int mb_adaptive_frame_field_flag)
+{
+ ColocatedParams *s;
+
+ s = calloc(1, sizeof(ColocatedParams));
+ if (NULL == s)
+ no_mem_exit("alloc_colocated: s");
+
+ s->size_x = size_x;
+ s->size_y = size_y;
+
+ alloc_motion_params(p_Vid, &s->frame, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+
+ if (mb_adaptive_frame_field_flag)
+ {
+ alloc_motion_params(p_Vid, &s->top , size_y / (BLOCK_SIZE * 2), size_x / BLOCK_SIZE);
+ alloc_motion_params(p_Vid, &s->bottom, size_y / (BLOCK_SIZE * 2), size_x / BLOCK_SIZE);
+ }
+
+ s->mb_adaptive_frame_field_flag = mb_adaptive_frame_field_flag;
+
+ return s;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Free co-located memory.
+ *
+ * \param p
+ * Picture to be freed
+ *
+ ************************************************************************
+ */
+void free_colocated(VideoParameters *p_Vid, ColocatedParams* p)
+{
+ if (p)
+ {
+ if (motion_cache_dimensions_match(&p_Vid->motion_cache, p_Vid->width / BLOCK_SIZE, p_Vid->height / BLOCK_SIZE))
+ {
+ motion_cache_add(&p_Vid->motion_cache,p->frame.motion[0]);
+ motion_cache_add(&p_Vid->motion_cache,p->frame.motion[1]);
+ }
+ else
+ {
+ free_mem2DPicMotion(p->frame.motion[0]);
+ free_mem2DPicMotion(p->frame.motion[1]);
+ }
+
+ if (p->frame.moving_block)
+ {
+ free_mem2D (p->frame.moving_block);
+ p->frame.moving_block=NULL;
+ }
+
+ if (p->mb_adaptive_frame_field_flag)
+ {
+ free_mem2DPicMotion(p->top.motion[0]);
+ free_mem2DPicMotion(p->top.motion[1]);
+
+ if (p->top.moving_block)
+ {
+ free_mem2D (p->top.moving_block);
+ p->top.moving_block=NULL;
+ }
+
+ free_mem2DPicMotion(p->bottom.motion[0]);
+ free_mem2DPicMotion(p->bottom.motion[1]);
+
+ if (p->bottom.moving_block)
+ {
+ free_mem2D (p->bottom.moving_block);
+ p->bottom.moving_block=NULL;
+ }
+ }
+
+ free(p);
+
+ p = NULL;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Compute co-located motion info
+ *
+ ************************************************************************
+ */
+void compute_colocated (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6])
+{
+ StorablePicture *fs = listX[LIST_1 ][0];
+ int i,j, ii, jj;
+ int fs_size_x4 = (fs->size_x >> 2);
+ int fs_size_y4 = (fs->size_y >> 2);
+ MotionParams *p_motion = &p->frame;
+ PicMotionParams *p_frm_motion = &fs->motion;
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+ p->is_long_term = fs->is_long_term;
+
+ if (active_sps->direct_8x8_inference_flag)
+ {
+ // Spatial Direct
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ if (p->is_long_term)
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+ p_motion->moving_block[j][i]= (byte) 1;
+ }
+ }
+ }
+ else
+ {
+ PicMotion **motion0 = p_frm_motion->motion[LIST_0];
+ PicMotion **motion1 = p_frm_motion->motion[LIST_1];
+ for (j=0 ; j < fs_size_y4; j+=2)
+ {
+ PicMotion *ref0, *ref1;
+ byte *moving_block = p_motion->moving_block[j];
+ byte *moving_block2 = p_motion->moving_block[j+1];
+ jj = RSD(j);
+ ref0 = motion0[jj];
+ ref1 = motion1[jj];
+ for (i=0 ; i < fs_size_x4; i+=2)
+ {
+ PicMotion *r0;
+ ii = RSD(i);
+ r0 = &ref0[ii];
+ if (((r0->ref_idx == 0) && (iabs(r0->mv[0])>>1 == 0) && (iabs(r0->mv[1])>>1 == 0)))
+ {
+ moving_block[i]=0;
+ moving_block[i+1]=0;
+ moving_block2[i]=0;
+ moving_block2[i+1]=0;
+ }
+ else if (r0->ref_idx == -1)
+ {
+ PicMotion *r1 = &ref1[ii];
+ if ((r1->ref_idx == 0) && (iabs(r1->mv[0])>>1 == 0) && (iabs(r1->mv[1])>>1 == 0))
+ {
+ moving_block[i]=0;
+ moving_block[i+1]=0;
+ moving_block2[i]=0;
+ moving_block2[i+1]=0;
+ }
+ else
+ {
+ moving_block[i]=1;
+ moving_block[i+1]=1;
+ moving_block2[i]=1;
+ moving_block2[i+1]=1;
+ }
+ }
+ else
+ {
+ moving_block[i]=1;
+ moving_block[i+1]=1;
+ moving_block2[i]=1;
+ moving_block2[i+1]=1;
+ }
+ /*
+ p_motion->moving_block[j][i]= (byte) (
+ !(((ref0[ii].ref_idx == 0)
+ && (iabs(ref0[ii].mv[0])>>1 == 0)
+ && (iabs(ref0[ii].mv[1])>>1 == 0)))
+ || ((ref0[ii].ref_idx == -1)
+ && (ref1[ii].ref_idx == 0)
+ && (iabs(ref1[ii].mv[0])>>1 == 0)
+ && (iabs(ref1[ii].mv[1])>>1 == 0))));
+ */
+ }
+ }
+ }
+ }
+ else
+ {
+ int k;
+
+ for (k = LIST_0; k<=LIST_1; k++)
+ {
+ for (j = 0; j < fs_size_y4; j++)
+ {
+ for (i=0;i<fs_size_x4;i++)
+ {
+ PicMotion *src = &p_frm_motion->motion[k][j][i];
+ PicMotion *dest = &p_motion->motion[k][j][i];
+ memcpy(&dest->mv, &src->mv, sizeof(MotionVector));
+ dest->ref_idx = src->ref_idx;
+ dest->ref_pic_id = src->ref_id;
+ }
+ }
+ }
+
+ // temporal direct
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ jj = RSD(j);
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+ ii = RSD(i);
+
+ p_motion->motion[LIST_0][j][i] = p_motion->motion[LIST_0][jj][ii];
+ p_motion->motion[LIST_1][j][i] = p_motion->motion[LIST_1][jj][ii];
+ //p_motion->mv[LIST_0][j][i].mv[0] = p_motion->mv[LIST_0][jj][ii][0];
+ //p_motion->mv[LIST_0][j][i].mv[1] = p_motion->mv[LIST_0][jj][ii][1];
+ //p_motion->mv[LIST_1][j][i].mv[0] = p_motion->mv[LIST_1][jj][ii][0];
+ //p_motion->mv[LIST_1][j][i][1] = p_motion->mv[LIST_1][jj][ii][1];
+
+ //p_motion->ref_idx[LIST_0][j][i] = p_motion->ref_idx[LIST_0][jj][ii];
+ //p_motion->ref_idx[LIST_1][j][i] = p_motion->ref_idx[LIST_1][jj][ii];
+ //p_motion->ref_pic_id[LIST_0][j][i] = p_motion->ref_pic_id[LIST_0][jj][ii];
+ //p_motion->ref_pic_id[LIST_1][j][i] = p_motion->ref_pic_id[LIST_1][jj][ii];
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+ memcpy(&p_motion->motion[LIST_0][j][i].mv, &p_frm_motion->motion[LIST_0][j][i].mv, sizeof(MotionVector));
+ p_motion->motion[LIST_0][j][i].ref_idx= p_frm_motion->motion[LIST_0][j][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id= p_frm_motion->motion[LIST_0][j][i].ref_id;
+ memcpy(&p_motion->motion[LIST_1][j][i].mv, &p_frm_motion->motion[LIST_1][j][i].mv, sizeof(MotionVector));
+ p_motion->motion[LIST_1][j][i].ref_idx= p_frm_motion->motion[LIST_1][j][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_pic_id= p_frm_motion->motion[LIST_1][j][i].ref_id;
+ }
+ }
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ if (p->is_long_term)
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+
+ p_motion->moving_block[j][i]= 1;
+ }
+ }
+ }
+ else
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+ p_motion->moving_block[j][i]=
+ !((((p_motion->motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p_motion->motion[LIST_0][j][i].ref_idx == -1)
+ && (p_motion->motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+ }
+ }
+ }
+ }
+
+ if (currSlice->direct_spatial_mv_pred_flag == 0)
+ {
+ for (j = 0; j < 2;j += 2)
+ {
+ for (i=0; i<p_Vid->listXsize[j];i++)
+ {
+ int prescale, iTRb, iTRp;
+
+ if (j==0)
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->poc - listX[LIST_0 + j][i]->poc );
+ }
+ else if (j == 2)
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->top_poc - listX[LIST_0 + j][i]->poc );
+ }
+ else
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc );
+ }
+
+ iTRp = iClip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc);
+
+ if (iTRp!=0)
+ {
+ prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp;
+ currSlice->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ;
+ }
+ else
+ {
+ currSlice->mvscale[j][i] = 9999;
+ }
+ }
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Compute co-located motion info (for interlace support)
+ *
+ ************************************************************************
+ */
+void compute_colocated_frames_mbs (Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6])
+{
+ StorablePicture *fs = listX[LIST_1 ][0];
+ StorablePicture *fs_top = fs, *fs_bottom = fs;
+ int i,j, ii, jj, jdiv;
+ int fs_size_x4 = (fs->size_x >> 2);
+ int fs_size_y4 = (fs->size_y >> 2);
+ MotionParams *p_motion = &p->frame;
+ PicMotionParams *p_frm_motion = &fs->motion;
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+
+ if (currSlice->mb_aff_frame_flag)
+ {
+ fs_top = listX[LIST_1 + 2][0];
+ fs_bottom = listX[LIST_1 + 4][0];
+ }
+ else
+ {
+ if (p_Vid->field_pic_flag)
+ {
+ if ((p_Vid->structure != fs->structure) && (fs->coded_frame))
+ {
+ if (p_Vid->structure==TOP_FIELD)
+ {
+ fs_top = fs_bottom = fs = listX[LIST_1 ][0]->top_field;
+ }
+ else
+ {
+ fs_top = fs_bottom = fs = listX[LIST_1 ][0]->bottom_field;
+ }
+ }
+ p_frm_motion = &fs->motion;
+ }
+ }
+
+ if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+ {
+ if (!currSlice->mb_aff_frame_flag)
+ {
+ int k;
+
+ for (k = LIST_0; k<=LIST_1; k++)
+ {
+ for (j = 0; j < (fs->size_y>>2); j++)
+ {
+ for (i=0;i<fs_size_x4;i++)
+ {
+
+ memcpy(&p_motion->motion[k][j][i].mv, &p_frm_motion->motion[k][j][i].mv, sizeof(MotionVector));
+ p_motion->motion[k][j][i].ref_idx=p_frm_motion->motion[k][j][i].ref_idx;
+ p_motion->motion[k][j][i].ref_pic_id=p_frm_motion->motion[k][j][i].ref_id;
+ }
+ }
+ }
+ p->is_long_term = fs->is_long_term;
+ }
+ else
+ {
+ for (j=0 ; j < (fs->size_y>>2); j++)
+ {
+ jdiv = (j>>1);
+ jj = jdiv + ((j>>3)<<2);
+ for (i=0 ; i < fs_size_x4 ; i++)
+ {
+ if (p_frm_motion->field_frame[j][i])
+ {
+ //! Assign frame buffers for field MBs
+ //! Check whether we should use top or bottom field mvs.
+ //! Depending on the assigned poc values.
+
+ if (iabs(p_Vid->dec_picture->poc - fs_bottom->poc)> iabs(p_Vid->dec_picture->poc -fs_top->poc) )
+ {
+ p_motion->motion[LIST_0][j][i].mv[0] = fs_top->motion.motion[LIST_0][jdiv][i].mv[0];
+ p_motion->motion[LIST_0][j][i].mv[1] = fs_top->motion.motion[LIST_0][jdiv][i].mv[1];
+ p_motion->motion[LIST_1][j][i].mv[0] = fs_top->motion.motion[LIST_1][jdiv][i].mv[0];
+ p_motion->motion[LIST_1][j][i].mv[1] = fs_top->motion.motion[LIST_1][jdiv][i].mv[1];
+ p_motion->motion[LIST_0][j][i].ref_idx = fs_top->motion.motion[LIST_0][jdiv][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_idx = fs_top->motion.motion[LIST_1][jdiv][i].ref_idx;
+ //p_motion->mv[LIST_0][j][i][0] = fs_top->motion.motion[LIST_0][jdiv][i].mv[0];
+ //p_motion->mv[LIST_0][j][i][1] = fs_top->motion.motion[LIST_0][jdiv][i].mv[1] ;
+ //p_motion->mv[LIST_1][j][i][0] = fs_top->motion.motion[LIST_1][jdiv][i].mv[0];
+ //p_motion->mv[LIST_1][j][i][1] = fs_top->motion.motion[LIST_1][jdiv][i].mv[1] ;
+ //p_motion->ref_idx[LIST_0][j][i] = fs_top->motion.motion[LIST_0][jdiv][i].ref_idx;
+ //p_motion->ref_idx[LIST_1][j][i] = fs_top->motion.motion[LIST_1][jdiv][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj][i].ref_id;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj][i].ref_id;
+
+ p->is_long_term = fs_top->is_long_term;
+ }
+ else
+ {
+ p_motion->motion[LIST_0][j][i].mv[0] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[0];
+ p_motion->motion[LIST_0][j][i].mv[1] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[1];
+ p_motion->motion[LIST_1][j][i].mv[0] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[0];
+ p_motion->motion[LIST_1][j][i].mv[1] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[1];
+ p_motion->motion[LIST_0][j][i].ref_idx = fs_bottom->motion.motion[LIST_0][jdiv][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_idx = fs_bottom->motion.motion[LIST_1][jdiv][i].ref_idx;
+ //p_motion->mv[LIST_0][j][i][0] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[0];
+ //p_motion->mv[LIST_0][j][i][1] = fs_bottom->motion.motion[LIST_0][jdiv][i].mv[1] ;
+ //p_motion->mv[LIST_1][j][i][0] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[0];
+ //p_motion->mv[LIST_1][j][i][1] = fs_bottom->motion.motion[LIST_1][jdiv][i].mv[1] ;
+ //p_motion->ref_idx[LIST_0][j][i] = fs_bottom->motion.motion[LIST_0][jdiv][i].ref_idx;
+ //p_motion->ref_idx[LIST_1][j][i] = fs_bottom->motion.motion[LIST_1][jdiv][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj + 4][i].ref_id;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj + 4][i].ref_id;
+
+ p->is_long_term = fs_bottom->is_long_term;
+ }
+ }
+ else
+ {
+ p_motion->motion[LIST_0][j][i].mv[0] = p_frm_motion->motion[LIST_0][j][i].mv[0];
+ p_motion->motion[LIST_0][j][i].mv[1] = p_frm_motion->motion[LIST_0][j][i].mv[1] ;
+ p_motion->motion[LIST_1][j][i].mv[0] = p_frm_motion->motion[LIST_1][j][i].mv[0];
+ p_motion->motion[LIST_1][j][i].mv[1] = p_frm_motion->motion[LIST_1][j][i].mv[1] ;
+ p_motion->motion[LIST_0][j][i].ref_idx = p_frm_motion->motion[LIST_0][j][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_idx = p_frm_motion->motion[LIST_1][j][i].ref_idx;
+ //p_motion->mv[LIST_0][j][i][0] = p_frm_motion->motion[LIST_0][j][i].mv[0];
+ //p_motion->mv[LIST_0][j][i][1] = p_frm_motion->motion[LIST_0][j][i].mv[1] ;
+ //p_motion->mv[LIST_1][j][i][0] = p_frm_motion->motion[LIST_1][j][i].mv[0];
+ //p_motion->mv[LIST_1][j][i][1] = p_frm_motion->motion[LIST_1][j][i].mv[1] ;
+ //p_motion->ref_idx[LIST_0][j][i] = p_frm_motion->motion[LIST_0][j][i].ref_idx;
+ //p_motion->ref_idx[LIST_1][j][i] = p_frm_motion->motion[LIST_1][j][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][j][i].ref_id;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][j][i].ref_id;
+
+ p->is_long_term = fs->is_long_term;
+ }
+ }
+ }
+ }
+ }
+
+ //! Generate field MVs from Frame MVs
+ if (p_Vid->structure || currSlice->mb_aff_frame_flag)
+ {
+ for (j = 0; j < fs->size_y >> 3; j++)
+ {
+ jj = RSD(j);
+ for (i = 0 ; i < fs->size_x >> 2; i++)
+ {
+ ii = RSD(i);
+ //! Do nothing if macroblock as field coded in MB-AFF
+ if (!currSlice->mb_aff_frame_flag )
+ {
+ p_motion->motion[LIST_0][j][i].mv[0] = p_frm_motion->motion[LIST_0][jj][ii].mv[0];
+ p_motion->motion[LIST_0][j][i].mv[1] = p_frm_motion->motion[LIST_0][jj][ii].mv[1];
+ p_motion->motion[LIST_1][j][i].mv[0] = p_frm_motion->motion[LIST_1][jj][ii].mv[0];
+ p_motion->motion[LIST_1][j][i].mv[1] = p_frm_motion->motion[LIST_1][jj][ii].mv[1];
+
+ // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+
+ if (p_frm_motion->motion[LIST_0][jj][ii].ref_idx == -1)
+ {
+ p_motion->motion[LIST_0][j][i].ref_idx = -1;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ?
+ }
+ else
+ {
+ p_motion->motion[LIST_0][j][i].ref_idx = p_frm_motion->motion[LIST_0][jj][ii].ref_idx ;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion [LIST_0][jj][ii].ref_id;
+ }
+
+ if (p_frm_motion->motion[LIST_1][jj][ii].ref_idx == -1)
+ {
+ p_motion->motion[LIST_1][j][i].ref_idx = -1;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ?
+ }
+ else
+ {
+ p_motion->motion[LIST_1][j][i].ref_idx = p_frm_motion->motion[LIST_1][jj][ii].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion [LIST_1][jj][ii].ref_id;
+ }
+
+ p->is_long_term = fs->is_long_term;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p_motion->moving_block[j][i] =
+ !((!p->is_long_term
+ && ((p_motion->motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p_motion->motion[LIST_0][j][i].ref_idx == -1)
+ && (p_motion->motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+ }
+ else
+ {
+ p->bottom.motion[LIST_0][j][i].mv[0] = fs_bottom->motion.motion[LIST_0][jj][ii].mv[0];
+ p->bottom.motion[LIST_0][j][i].mv[1] = fs_bottom->motion.motion[LIST_0][jj][ii].mv[1];
+ p->bottom.motion[LIST_1][j][i].mv[0] = fs_bottom->motion.motion[LIST_1][jj][ii].mv[0];
+ p->bottom.motion[LIST_1][j][i].mv[1] = fs_bottom->motion.motion[LIST_1][jj][ii].mv[1];
+ p->bottom.motion[LIST_0][j][i].ref_idx = fs_bottom->motion.motion[LIST_0][jj][ii].ref_idx;
+ p->bottom.motion[LIST_1][j][i].ref_idx = fs_bottom->motion.motion[LIST_1][jj][ii].ref_idx;
+ p->bottom.motion[LIST_0][j][i].ref_pic_id = fs_bottom->motion.motion[LIST_0][jj][ii].ref_id;
+ p->bottom.motion[LIST_1][j][i].ref_pic_id = fs_bottom->motion.motion[LIST_1][jj][ii].ref_id;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->bottom.moving_block[j][i] =
+ !((!fs_bottom->is_long_term
+ && ((p->bottom.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->bottom.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->bottom.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->bottom.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->bottom.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->bottom.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->bottom.motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+
+ p->top.motion[LIST_0][j][i].mv[0] = fs_top->motion.motion[LIST_0][jj][ii].mv[0];
+ p->top.motion[LIST_0][j][i].mv[1] = fs_top->motion.motion[LIST_0][jj][ii].mv[1];
+ p->top.motion[LIST_1][j][i].mv[0] = fs_top->motion.motion[LIST_1][jj][ii].mv[0];
+ p->top.motion[LIST_1][j][i].mv[1] = fs_top->motion.motion[LIST_1][jj][ii].mv[1];
+ p->top.motion[LIST_0][j][i].ref_idx = fs_top->motion.motion[LIST_0][jj][ii].ref_idx;
+ p->top.motion[LIST_1][j][i].ref_idx = fs_top->motion.motion[LIST_1][jj][ii].ref_idx;
+ p->top.motion[LIST_0][j][i].ref_pic_id = fs_top->motion.motion[LIST_0][jj][ii].ref_id;
+ p->top.motion[LIST_1][j][i].ref_pic_id = fs_top->motion.motion[LIST_1][jj][ii].ref_id;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->top.moving_block[j][i] =
+ !((!fs_top->is_long_term
+ && ((p->top.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->top.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->top.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->top.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->top.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->top.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->top.motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+
+ if ((currSlice->direct_spatial_mv_pred_flag == 0 ) && !p_frm_motion->field_frame[2*j][i])
+ {
+ p->top.motion[LIST_0][j][i].mv[1] /= 2;
+ p->top.motion[LIST_1][j][i].mv[1] /= 2;
+ p->bottom.motion[LIST_0][j][i].mv[1] /= 2;
+ p->bottom.motion[LIST_1][j][i].mv[1] /= 2;
+ }
+ }
+ }
+ }
+ }
+
+ //if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+ if (!active_sps->frame_mbs_only_flag)
+ {
+ //! Use inference flag to remap mvs/references
+ //! Frame with field co-located
+ if (!p_Vid->structure)
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ jdiv = (j >> 1);
+ jj = jdiv + ((j >> 3) << 2);
+ for (i = 0 ; i < fs_size_x4; i++)
+ {
+ if (p_frm_motion->field_frame[j][i])
+ {
+ if (iabs(p_Vid->dec_picture->poc - fs->bottom_field->poc) > iabs(p_Vid->dec_picture->poc - fs->top_field->poc))
+ {
+ p_motion->motion[LIST_0][j][i].mv[0] = fs->top_field->motion.motion[LIST_0][jdiv][i].mv[0];
+ p_motion->motion[LIST_0][j][i].mv[1] = fs->top_field->motion.motion[LIST_0][jdiv][i].mv[1] ;
+ p_motion->motion[LIST_1][j][i].mv[0] = fs->top_field->motion.motion[LIST_1][jdiv][i].mv[0];
+ p_motion->motion[LIST_1][j][i].mv[1] = fs->top_field->motion.motion[LIST_1][jdiv][i].mv[1] ;
+
+ p_motion->motion[LIST_0][j][i].ref_idx = fs->top_field->motion.motion[LIST_0][jdiv][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_idx = fs->top_field->motion.motion[LIST_1][jdiv][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj][i].ref_id;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj][i].ref_id;
+ p->is_long_term = fs->top_field->is_long_term;
+ }
+ else
+ {
+ p_motion->motion[LIST_0][j][i].mv[0] = fs->bottom_field->motion.motion[LIST_0][jdiv][i].mv[0];
+ p_motion->motion[LIST_0][j][i].mv[1] = fs->bottom_field->motion.motion[LIST_0][jdiv][i].mv[1] ;
+ p_motion->motion[LIST_1][j][i].mv[0] = fs->bottom_field->motion.motion[LIST_1][jdiv][i].mv[0];
+ p_motion->motion[LIST_1][j][i].mv[1] = fs->bottom_field->motion.motion[LIST_1][jdiv][i].mv[1] ;
+
+ p_motion->motion[LIST_0][j][i].ref_idx = fs->bottom_field->motion.motion[LIST_0][jdiv][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_idx = fs->bottom_field->motion.motion[LIST_1][jdiv][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion[LIST_0][jj + 4][i].ref_id;
+ p_motion->motion[LIST_1][j][i].ref_pic_id = p_frm_motion->motion[LIST_1][jj + 4][i].ref_id;
+ p->is_long_term = fs->bottom_field->is_long_term;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ p->is_long_term = fs->is_long_term;
+
+ if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+ {
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ for (j=0 ; j < (fs->size_y>>2); j++)
+ {
+ jj = RSD(j);
+ for (i=0 ; i < (fs->size_x>>2); i++)
+ {
+ ii = RSD(i);
+
+ p_motion->motion[LIST_0][j][i] = p_motion->motion[LIST_0][jj][ii];
+ p_motion->motion[LIST_1][j][i] = p_motion->motion[LIST_1][jj][ii];
+ //p_motion->motion[LIST_0][j][i].mv[0]=p_motion->motion[LIST_0][jj][ii].mv[0];
+ //p_motion->motion[LIST_0][j][i].mv[1]=p_motion->motion[LIST_0][jj][ii].mv[1];
+ //p_motion->motion[LIST_1][j][i].mv[0]=p_motion->motion[LIST_1][jj][ii].mv[0];
+ //p_motion->motion[LIST_1][j][i].mv[1]=p_motion->motion[LIST_1][jj][ii].mv[1];
+
+ //p_motion->motion[LIST_0][j][i].ref_idx=p_motion->motion[LIST_0][jj][ii].ref_idx;
+ //p_motion->motion[LIST_1][j][i].ref_idx=p_motion->motion[LIST_1][jj][ii].ref_idx;
+ //p_motion->motion[LIST_0][j][i].ref_pic_id = p_motion->motion[LIST_0][jj][ii].ref_pic_id;
+ //p_motion->motion[LIST_1][j][i].ref_pic_id = p_motion->motion[LIST_1][jj][ii].ref_pic_id;
+
+ p_motion->moving_block[j][i]= (byte) (
+ !((!p->is_long_term
+ && ((p_motion->motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p_motion->motion[LIST_0][j][i].ref_idx == -1)
+ && (p_motion->motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0))));
+ }
+ }
+ }
+ else
+ {
+ for (j=0 ; j < (fs->size_y>>2); j++)
+ {
+ jj = RSD(j);
+ for (i=0 ; i < (fs->size_x>>2); i++)
+ {
+ ii = RSD(i);
+ p_motion->motion[LIST_0][j][i]=p_motion->motion[LIST_0][jj][ii];
+ p_motion->motion[LIST_1][j][i]=p_motion->motion[LIST_1][jj][ii];
+
+ //p_motion->mv[LIST_0][j][i][0]=p_motion->mv[LIST_0][jj][ii][0];
+ //p_motion->mv[LIST_0][j][i][1]=p_motion->mv[LIST_0][jj][ii][1];
+ //p_motion->mv[LIST_1][j][i][0]=p_motion->mv[LIST_1][jj][ii][0];
+ //p_motion->mv[LIST_1][j][i][1]=p_motion->mv[LIST_1][jj][ii][1];
+
+ //p_motion->ref_idx[LIST_0][j][i]=p_motion->ref_idx[LIST_0][jj][ii];
+ //p_motion->ref_idx[LIST_1][j][i]=p_motion->ref_idx[LIST_1][jj][ii];
+ //p_motion->ref_pic_id[LIST_0][j][i] = p_motion->ref_pic_id[LIST_0][jj][ii];
+ //p_motion->ref_pic_id[LIST_1][j][i] = p_motion->ref_pic_id[LIST_1][jj][ii];
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+ memcpy(&p_motion->motion[LIST_0][j][i].mv, &p_frm_motion->motion[LIST_0][j][i].mv, sizeof(MotionVector));
+ p_motion->motion[LIST_0][j][i].ref_idx= p_frm_motion->motion[LIST_0][j][i].ref_idx;
+ p_motion->motion[LIST_0][j][i].ref_pic_id = p_frm_motion->motion [LIST_0][j][i].ref_id;
+ memcpy(&p_motion->motion[LIST_1][j][i].mv, &p_frm_motion->motion[LIST_1][j][i].mv, sizeof(MotionVector));
+ p_motion->motion[LIST_1][j][i].ref_idx= p_frm_motion->motion[LIST_1][j][i].ref_idx;
+ p_motion->motion[LIST_1][j][i].ref_pic_id= p_frm_motion->motion [LIST_1][j][i].ref_id;
+ }
+ }
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4; i++)
+ {
+ p_motion->moving_block[j][i]=
+ !((!p->is_long_term
+ && ((p_motion->motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p_motion->motion[LIST_0][j][i].ref_idx == -1)
+ && (p_motion->motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p_motion->motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+ }
+ }
+ }
+
+ if (currSlice->direct_spatial_mv_pred_flag == 0)
+ {
+ if (currSlice->mb_aff_frame_flag || !p_Vid->structure)
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4 ; i++)
+ {
+ if (p_frm_motion->field_frame[j][i])
+ {
+ p_motion->motion[LIST_0][j][i].mv[1] *= 2;
+ p_motion->motion[LIST_1][j][i].mv[1] *= 2;
+ }
+ }
+ }
+ }
+ else if (p_Vid->structure)
+ {
+ for (j=0 ; j < fs_size_y4; j++)
+ {
+ for (i=0 ; i < fs_size_x4 ; i++)
+ {
+ if (!p_frm_motion->field_frame[j][i])
+ {
+ p_motion->motion[LIST_0][j][i].mv[1] /= 2;
+ p_motion->motion[LIST_1][j][i].mv[1] /= 2;
+ }
+ }
+ }
+ }
+
+ for (j=0; j<2 + (currSlice->mb_aff_frame_flag * 4);j+=2)
+ {
+ for (i=0; i<p_Vid->listXsize[j];i++)
+ {
+ int prescale, iTRb, iTRp;
+
+ if (j==0)
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->poc - listX[LIST_0 + j][i]->poc );
+ }
+ else if (j == 2)
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->top_poc - listX[LIST_0 + j][i]->poc );
+ }
+ else
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc );
+ }
+
+ iTRp = iClip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc);
+
+ if (iTRp!=0)
+ {
+ prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp;
+ currSlice->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ;
+ }
+ else
+ {
+ currSlice->mvscale[j][i] = 9999;
+ }
+ }
+ }
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Compute co-located motion info
+ * for 4:4:4 Independent mode
+ *
+ ************************************************************************
+ */
+
+void compute_colocated_JV(Slice *currSlice, ColocatedParams* p, StorablePicture **listX[6])
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ StorablePicture *fs, *fs_top, *fs_bottom;
+ int i,j, ii, jj, jdiv;
+ int np = p_Vid->colour_plane_id;
+
+ fs_top = fs_bottom = fs = listX[LIST_1 ][0];
+
+ if (currSlice->mb_aff_frame_flag)
+ {
+ fs_top= listX[LIST_1 + 2][0];
+ fs_bottom= listX[LIST_1 + 4][0];
+ }
+ else
+ {
+ if (p_Vid->field_pic_flag)
+ {
+ if ((p_Vid->structure != fs->structure) && (fs->coded_frame))
+ {
+ if (p_Vid->structure==TOP_FIELD)
+ {
+ fs_top=fs_bottom=fs = listX[LIST_1 ][0]->top_field;
+ }
+ else
+ {
+ fs_top=fs_bottom=fs = listX[LIST_1 ][0]->bottom_field;
+ }
+ }
+ }
+ }
+
+ if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+ {
+ for (j=0 ; j<fs->size_y/4 ; j++)
+ {
+ jdiv = j/2;
+ jj = j/2 + 4 * (j/8);
+ for (i=0 ; i<fs->size_x/4 ; i++)
+ {
+
+ if (currSlice->mb_aff_frame_flag && fs->motion.field_frame[j][i])
+ {
+ //! Assign frame buffers for field MBs
+ //! Check whether we should use top or bottom field mvs.
+ //! Depending on the assigned poc values.
+
+ if (iabs(p_Vid->dec_picture->poc - fs_bottom->poc)> iabs(p_Vid->dec_picture->poc -fs_top->poc) )
+ {
+ p->frame.motion[LIST_0][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_0][jdiv][i].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ;
+ p->frame.motion[LIST_1][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_1][jdiv][i].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ;
+ p->frame.motion[LIST_0][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj][i].ref_id;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj][i].ref_id;
+
+ p->is_long_term = fs_top->is_long_term;
+ }
+ else
+ {
+ p->frame.motion[LIST_0][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_0][jdiv][i].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ;
+ p->frame.motion[LIST_1][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_1][jdiv][i].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ;
+ p->frame.motion[LIST_0][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj + 4][i].ref_id;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj + 4][i].ref_id;
+
+ p->is_long_term = fs_bottom->is_long_term;
+ }
+ }
+ else
+ {
+ p->frame.motion[LIST_0][j][i].mv[0] = fs->JVmotion[np].motion[LIST_0][j][i].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs->JVmotion[np].motion[LIST_0][j][i].mv[1] ;
+ p->frame.motion[LIST_1][j][i].mv[0] = fs->JVmotion[np].motion[LIST_1][j][i].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs->JVmotion[np].motion[LIST_1][j][i].mv[1] ;
+ p->frame.motion[LIST_0][j][i].ref_idx = fs->JVmotion[np].motion[LIST_0][j][i].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_idx = fs->JVmotion[np].motion[LIST_1][j][i].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][j][i].ref_id;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][j][i].ref_id;
+
+ p->is_long_term = fs->is_long_term;
+ }
+ }
+ }
+ }
+
+
+ //! Generate field MVs from Frame MVs
+ if (p_Vid->structure || currSlice->mb_aff_frame_flag)
+ {
+ for (j=0 ; j<fs->size_y/8 ; j++)
+ {
+ jj = RSD(j);
+ for (i=0 ; i<fs->size_x/4 ; i++)
+ {
+ ii = RSD(i);
+ //! Do nothing if macroblock as field coded in MB-AFF
+ if (!currSlice->mb_aff_frame_flag )
+ {
+ p->frame.motion[LIST_0][j][i].mv[0] = fs->JVmotion[np].motion[LIST_0][jj][ii].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs->JVmotion[np].motion[LIST_0][jj][ii].mv[1];
+ p->frame.motion[LIST_1][j][i].mv[0] = fs->JVmotion[np].motion[LIST_1][jj][ii].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs->JVmotion[np].motion[LIST_1][jj][ii].mv[1];
+
+ // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+
+ if (fs->JVmotion[np].motion[LIST_0][jj][ii].ref_idx == -1)
+ {
+ p->frame.motion[LIST_0][j][i].ref_idx = -1;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ?
+ }
+ else
+ {
+ p->frame.motion[LIST_0][j][i].ref_idx = fs->JVmotion[np].motion[LIST_0][jj][ii].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj][ii].ref_id;
+ }
+
+ if (fs->JVmotion[np].motion[LIST_1][jj][ii].ref_idx == -1)
+ {
+ p->frame.motion[LIST_1][j][i].ref_idx = -1;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = UNDEFINED_REFERENCE; // TODO: UNDEFINED_REFERENCE ?
+ }
+ else
+ {
+ p->frame.motion[LIST_1][j][i].ref_idx = fs->JVmotion[np].motion[LIST_1][jj][ii].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj][ii].ref_id;
+ }
+
+ p->is_long_term = fs->is_long_term;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->frame.moving_block[j][i] =
+ !((!p->is_long_term
+ && ((p->frame.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->frame.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->frame.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->frame.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->frame.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->frame.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->frame.motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+ }
+ else
+ {
+ p->bottom.motion[LIST_0][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].mv[0];
+ p->bottom.motion[LIST_0][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].mv[1];
+ p->bottom.motion[LIST_1][j][i].mv[0] = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].mv[0];
+ p->bottom.motion[LIST_1][j][i].mv[1] = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].mv[1];
+ p->bottom.motion[LIST_0][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].ref_idx;
+ p->bottom.motion[LIST_1][j][i].ref_idx = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].ref_idx;
+ p->bottom.motion[LIST_0][j][i].ref_pic_id = fs_bottom->JVmotion[np].motion[LIST_0][jj][ii].ref_id;
+ p->bottom.motion[LIST_1][j][i].ref_pic_id = fs_bottom->JVmotion[np].motion[LIST_1][jj][ii].ref_id;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->bottom.moving_block[j][i] =
+ !((!fs_bottom->is_long_term
+ && ((p->bottom.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->bottom.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->bottom.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->bottom.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->bottom.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->bottom.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->bottom.motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+
+ p->top.motion[LIST_0][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_0][jj][ii].mv[0];
+ p->top.motion[LIST_0][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_0][jj][ii].mv[1];
+ p->top.motion[LIST_1][j][i].mv[0] = fs_top->JVmotion[np].motion[LIST_1][jj][ii].mv[0];
+ p->top.motion[LIST_1][j][i].mv[1] = fs_top->JVmotion[np].motion[LIST_1][jj][ii].mv[1];
+ p->top.motion[LIST_0][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_0][jj][ii].ref_idx;
+ p->top.motion[LIST_1][j][i].ref_idx = fs_top->JVmotion[np].motion[LIST_1][jj][ii].ref_idx;
+ p->top.motion[LIST_0][j][i].ref_pic_id = fs_top->JVmotion[np].motion[LIST_0][jj][ii].ref_id;
+ p->top.motion[LIST_1][j][i].ref_pic_id = fs_top->JVmotion[np].motion[LIST_1][jj][ii].ref_id;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->top.moving_block[j][i] =
+ !((!fs_top->is_long_term
+ && ((p->top.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->top.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->top.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->top.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->top.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->top.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->top.motion[LIST_1][j][i].mv[1])>>1 == 0)));
+ }
+
+ if ((currSlice->direct_spatial_mv_pred_flag == 0 ) && !fs->motion.field_frame[2*j][i])
+ {
+ p->top.motion[LIST_0][j][i].mv[1] /= 2;
+ p->top.motion[LIST_1][j][i].mv[1] /= 2;
+ p->bottom.motion[LIST_0][j][i].mv[1] /= 2;
+ p->bottom.motion[LIST_1][j][i].mv[1] /= 2;
+ }
+
+ }
+ }
+ }
+ }
+
+
+ if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+ {
+ //! Use inference flag to remap mvs/references
+ //! Frame with field co-located
+
+ if (!p_Vid->structure)
+ {
+ for (j=0 ; j<fs->size_y/4 ; j++)
+ {
+ jdiv = j/2;
+ jj = j/2 + 4*(j/8);
+ for (i=0 ; i<fs->size_x/4 ; i++)
+ {
+
+ if (fs->motion.field_frame[j][i])
+ {
+ if (iabs(p_Vid->dec_picture->poc - fs->bottom_field->poc) > iabs(p_Vid->dec_picture->poc - fs->top_field->poc))
+ {
+ p->frame.motion[LIST_0][j][i].mv[0] = fs->top_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs->top_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ;
+ p->frame.motion[LIST_1][j][i].mv[0] = fs->top_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs->top_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ;
+
+ p->frame.motion[LIST_0][j][i].ref_idx = fs->top_field->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_idx = fs->top_field->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj][i].ref_id;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj][i].ref_id;
+ p->is_long_term = fs->top_field->is_long_term;
+ }
+ else
+ {
+ p->frame.motion[LIST_0][j][i].mv[0] = fs->bottom_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs->bottom_field->JVmotion[np].motion[LIST_0][jdiv][i].mv[1] ;
+ p->frame.motion[LIST_1][j][i].mv[0] = fs->bottom_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs->bottom_field->JVmotion[np].motion[LIST_1][jdiv][i].mv[1] ;
+
+ p->frame.motion[LIST_0][j][i].ref_idx = fs->bottom_field->JVmotion[np].motion[LIST_0][jdiv][i].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_idx = fs->bottom_field->JVmotion[np].motion[LIST_1][jdiv][i].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][jj + 4][i].ref_id;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][jj + 4][i].ref_id;
+ p->is_long_term = fs->bottom_field->is_long_term;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ p->is_long_term = fs->is_long_term;
+
+ if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+ {
+ for (j=0 ; j<fs->size_y/4 ; j++)
+ {
+ jj = RSD(j);
+ for (i=0 ; i<fs->size_x/4 ; i++)
+ {
+ ii = RSD(i);
+
+ p->frame.motion[LIST_0][j][i] = p->frame.motion[LIST_0][jj][ii];
+ p->frame.motion[LIST_1][j][i] = p->frame.motion[LIST_1][jj][ii];
+ //p->frame.mv[LIST_0][j][i][0] = p->frame.mv[LIST_0][jj][ii][0];
+ //p->frame.mv[LIST_0][j][i][1] = p->frame.mv[LIST_0][jj][ii][1];
+ //p->frame.mv[LIST_1][j][i][0] = p->frame.mv[LIST_1][jj][ii][0];
+ //p->frame.mv[LIST_1][j][i][1] = p->frame.mv[LIST_1][jj][ii][1];
+
+ //p->frame.ref_idx[LIST_0][j][i]=p->frame.ref_idx[LIST_0][jj][ii];
+ //p->frame.ref_idx[LIST_1][j][i]=p->frame.ref_idx[LIST_1][jj][ii];
+ //p->frame.ref_pic_id[LIST_0][j][i] = p->frame.ref_pic_id[LIST_0][jj][ii];
+ //p->frame.ref_pic_id[LIST_1][j][i] = p->frame.ref_pic_id[LIST_1][jj][ii];
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->frame.moving_block[j][i]= (byte) (
+ !((!p->is_long_term
+ && ((p->frame.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->frame.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->frame.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->frame.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->frame.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->frame.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->frame.motion[LIST_1][j][i].mv[1])>>1 == 0))));
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j=0 ; j<fs->size_y/4 ; j++)
+ {
+ jj = RSD(j);
+ for (i=0 ; i<fs->size_x/4 ; i++)
+ {
+ ii = RSD(i);
+ //! Use inference flag to remap mvs/references
+ p->frame.motion[LIST_0][j][i].mv[0] = fs->JVmotion[np].motion[LIST_0][j][i].mv[0];
+ p->frame.motion[LIST_0][j][i].mv[1] = fs->JVmotion[np].motion[LIST_0][j][i].mv[1];
+ p->frame.motion[LIST_1][j][i].mv[0] = fs->JVmotion[np].motion[LIST_1][j][i].mv[0];
+ p->frame.motion[LIST_1][j][i].mv[1] = fs->JVmotion[np].motion[LIST_1][j][i].mv[1];
+
+ p->frame.motion[LIST_0][j][i].ref_idx = fs->JVmotion[np].motion[LIST_0][j][i].ref_idx;
+ p->frame.motion[LIST_1][j][i].ref_idx = fs->JVmotion[np].motion[LIST_1][j][i].ref_idx;
+ p->frame.motion[LIST_0][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_0][j][i].ref_id;
+ p->frame.motion[LIST_1][j][i].ref_pic_id = fs->JVmotion[np].motion[LIST_1][j][i].ref_id;
+
+ if (currSlice->direct_spatial_mv_pred_flag == 1)
+ {
+ p->frame.moving_block[j][i]= (byte) (
+ !((!p->is_long_term
+ && ((p->frame.motion[LIST_0][j][i].ref_idx == 0)
+ && (iabs(p->frame.motion[LIST_0][j][i].mv[0])>>1 == 0)
+ && (iabs(p->frame.motion[LIST_0][j][i].mv[1])>>1 == 0)))
+ || ((p->frame.motion[LIST_0][j][i].ref_idx == -1)
+ && (p->frame.motion[LIST_1][j][i].ref_idx == 0)
+ && (iabs(p->frame.motion[LIST_1][j][i].mv[0])>>1 == 0)
+ && (iabs(p->frame.motion[LIST_1][j][i].mv[1])>>1 == 0))));
+ }
+ }
+ }
+ }
+
+
+ if (currSlice->direct_spatial_mv_pred_flag == 0)
+ {
+ for (j=0 ; j<fs->size_y/4 ; j++)
+ {
+ for (i=0 ; i<fs->size_x/4 ; i++)
+ {
+ if ((!currSlice->mb_aff_frame_flag &&!p_Vid->structure && fs->motion.field_frame[j][i]) || (currSlice->mb_aff_frame_flag && fs->motion.field_frame[j][i]))
+ {
+ p->frame.motion[LIST_0][j][i].mv[1] *= 2;
+ p->frame.motion[LIST_1][j][i].mv[1] *= 2;
+ }
+ else if (p_Vid->structure && !fs->motion.field_frame[j][i])
+ {
+ p->frame.motion[LIST_0][j][i].mv[1] /= 2;
+ p->frame.motion[LIST_1][j][i].mv[1] /= 2;
+ }
+
+ }
+ }
+
+ for (j=0; j<2 + (currSlice->mb_aff_frame_flag * 4);j+=2)
+ {
+ for (i=0; i<p_Vid->listXsize[j];i++)
+ {
+ int prescale, iTRb, iTRp;
+
+ if (j==0)
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->poc - listX[LIST_0 + j][i]->poc );
+ }
+ else if (j == 2)
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->top_poc - listX[LIST_0 + j][i]->poc );
+ }
+ else
+ {
+ iTRb = iClip3( -128, 127, p_Vid->dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc );
+ }
+
+ iTRp = iClip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc);
+
+ if (iTRp!=0)
+ {
+ prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp;
+ currSlice->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ;
+ }
+ else
+ {
+ currSlice->mvscale[j][i] = 9999;
+ }
+ }
+ }
+ }
+}
+
+void copy_storable_param_JV( VideoParameters *p_Vid, PicMotionParams *JVplane, PicMotionParams *motion )
+{
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ int md_size = (p_Vid->height / BLOCK_SIZE) * (p_Vid->width / BLOCK_SIZE);
+ int ref_size = active_sps->frame_mbs_only_flag ? 2 * md_size : 6 * md_size;
+
+ memcpy(JVplane->motion, motion->motion, md_size*sizeof(PicMotion));
+ if (motion->field_references)
+ memcpy(JVplane->field_references, motion->field_references, 4*md_size*sizeof(h264_ref_t));
+}
diff --git a/Src/h264dec/ldecod/src/mc_prediction.c b/Src/h264dec/ldecod/src/mc_prediction.c
new file mode 100644
index 00000000..8af3aae8
--- /dev/null
+++ b/Src/h264dec/ldecod/src/mc_prediction.c
@@ -0,0 +1,2420 @@
+
+/*!
+*************************************************************************************
+* \file mc_prediction.c
+*
+* \brief
+* Functions for motion compensated prediction
+*
+* \author
+* Main contributors (see contributors.h for copyright,
+* address and affiliation details)
+* - Alexis Michael Tourapis <alexismt@ieee.org>
+*
+*************************************************************************************
+*/
+#include "global.h"
+#include "block.h"
+#include "mc_prediction.h"
+#include "mbuffer.h"
+#include "mb_access.h"
+#include "macroblock.h"
+#include "memalloc.h"
+#include "optim.h"
+#include <emmintrin.h>
+
+static const int COEF[6] = { 1, -5, 20, 20, -5, 1 };
+/*!
+************************************************************************
+* \brief
+* block single list prediction
+************************************************************************
+*/
+static inline void mc_prediction(h264_imgpel_macroblock_t mb_pred,
+ int joff,
+ int ver_block_size,
+ int hor_block_size,
+ int ioff,
+ const h264_imgpel_macroblock_t block)
+{
+ int jj;
+
+ if (hor_block_size == MB_BLOCK_SIZE)
+ {
+ memcpy(&(mb_pred[joff][ioff]), &(block[0][0]), hor_block_size * ver_block_size * sizeof(imgpel));
+ }
+ else
+ {
+ h264_imgpel_macroblock_row_t *dest = (h264_imgpel_macroblock_row_t *)(mb_pred[joff]);
+ for(jj = 0; jj < ver_block_size; jj++)
+ {
+ memcpy(&dest[jj][ioff], &(block[jj][0]), hor_block_size * sizeof(imgpel));
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* block single list weighted prediction
+************************************************************************
+*/
+static inline void weighted_mc_prediction(h264_imgpel_macroblock_row_t *mb_pred,
+ int ver_block_size,
+ int hor_block_size,
+ int wp_scale,
+ int wp_offset,
+ int weight_denom)
+{
+#ifdef H264_IPP
+ IppiSize roi = {hor_block_size, ver_block_size};
+ ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi);
+#else
+ int ii, jj;
+ if (weight_denom > 0)
+ {
+ for(jj=0;jj<ver_block_size;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<hor_block_size;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj=0;jj<ver_block_size;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<hor_block_size;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+#endif
+}
+
+
+void weighted_mc_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ int jj;
+
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_scale = _mm_set1_epi16(wp_scale);
+ __m128i xmm_offset = _mm_set1_epi16(wp_offset);
+ if (weight_denom > 0)
+ {
+ __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom);
+ __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1)));
+
+ for(jj = 0; jj < 16; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale);
+ b0_low = _mm_add_epi16(b0_low, xmm_add);
+ b0_high = _mm_add_epi16(b0_high, xmm_add);
+ b0_low = _mm_sra_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sra_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+ // (x + (1 << (a-1) )) >> a;
+ //row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+
+ __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom);
+ for(jj = 0; jj < 16; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale);
+ b0_low = _mm_sll_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sll_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+ //(x << a);
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+}
+
+#ifdef H264_IPP
+void weighted_mc_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ IppiSize roi = {16, 16};
+ ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi);
+}
+#endif
+
+void weighted_mc_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ int ii, jj;
+ if (weight_denom > 0)
+ {
+ for(jj=0;jj<16;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj=0;jj<16;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+}
+
+
+/* 16x8 */
+void weighted_mc_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ int jj;
+
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_scale = _mm_set1_epi16(wp_scale);
+ __m128i xmm_offset = _mm_set1_epi16(wp_offset);
+ if (weight_denom > 0)
+ {
+ __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom);
+ __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1)));
+
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale);
+ b0_low = _mm_add_epi16(b0_low, xmm_add);
+ b0_high = _mm_add_epi16(b0_high, xmm_add);
+ b0_low = _mm_sra_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sra_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+ // (x + (1 << (a-1) )) >> a;
+ //row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+
+ __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom);
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale);
+ b0_low = _mm_sll_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sll_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+ //(x << a);
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+}
+
+#ifdef H264_IPP
+void weighted_mc_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ IppiSize roi = {16, 8};
+ ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi);
+}
+#endif
+
+void weighted_mc_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ int ii, jj;
+ if (weight_denom > 0)
+ {
+ for(jj=0;jj<8;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj=0;jj<8;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+}
+
+
+
+
+#define LOAD_LINE_EPI16(reg, ptr) { reg = _mm_loadl_epi64((__m128i *)(ptr)); reg = _mm_unpacklo_epi8(reg, xmm_zero); }
+void weighted_mc_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ int jj;
+
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_scale = _mm_set1_epi16(wp_scale);
+ __m128i xmm_offset = _mm_set1_epi16(wp_offset);
+ if (weight_denom > 0)
+ {
+ __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom);
+ __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1)));
+
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0;
+ LOAD_LINE_EPI16(b0, (__m128i *)mb_pred[jj]);
+ b0 = _mm_mullo_epi16(b0, xmm_scale);
+ b0 = _mm_add_epi16(b0, xmm_add);
+ b0 = _mm_sra_epi16(b0, xmm_shift);
+ b0 = _mm_add_epi16(b0, xmm_offset);
+
+ b0 = _mm_packus_epi16(b0, b0); // convert back to epi8
+ _mm_storel_epi64((__m128i *)mb_pred[jj], b0);
+ // (x + (1 << (a-1) )) >> a;
+ //row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+
+ __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom);
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0;
+ LOAD_LINE_EPI16(b0, (__m128i *)mb_pred[jj]);
+ b0 = _mm_mullo_epi16(b0, xmm_scale);
+ b0 = _mm_sll_epi16(b0, xmm_shift);
+ b0 = _mm_add_epi16(b0, xmm_offset);
+
+ b0 = _mm_packus_epi16(b0, b0); // convert back to epi8
+ _mm_storel_epi64((__m128i *)mb_pred[jj], b0);
+ //(x << a);
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+}
+
+#ifdef H264_IPP
+void weighted_mc_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ IppiSize roi = {8, 8};
+ ippiUniDirWeightBlock_H264_8u_C1IR(mb_pred[0], sizeof(mb_pred[0]), weight_denom, wp_scale, wp_offset, roi);
+}
+#endif
+
+void weighted_mc_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, int wp_scale, int wp_offset, int weight_denom)
+{
+ int ii, jj;
+ if (weight_denom > 0)
+ {
+ for(jj=0;jj<8;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<8;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj=0;jj<8;jj++)
+ {
+ imgpel *row = mb_pred[jj];
+ const imgpel *b0 = row;
+
+ for(ii=0;ii<8;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale * b0[ii]), weight_denom) + wp_offset ));
+ }
+ }
+}
+
+
+
+/*!
+************************************************************************
+* \brief
+* block biprediction
+************************************************************************
+*/
+static inline void bi_prediction(h264_imgpel_macroblock_row_t *mb_pred,
+ //int joff,
+ const h264_imgpel_macroblock_t block_l0,
+ //const h264_imgpel_macroblock_t block_l1,
+ int ver_block_size,
+ int hor_block_size
+ //int ioff
+ )
+{
+
+#ifdef H264_IPP
+ ippiInterpolateBlock_H264_8u_P2P1R(block_l0[0], mb_pred[0], mb_pred[0], hor_block_size, ver_block_size, sizeof(mb_pred[0]));
+#else
+ int ii, jj;
+
+ for(jj = 0;jj < ver_block_size;jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii = 0; ii < hor_block_size;ii++)
+ row[ii] = (imgpel) rshift_rnd_sf((b0[ii] + b1[ii]), 1);
+ }
+#endif
+}
+
+static void bi_prediction4x4_mmx(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0)
+{
+ int jj;
+ __m64 b0, b1;
+ __m64 mmx_zero = _mm_setzero_si64();
+ __m64 mmx_one = _mm_set1_pi16(1);
+
+ for(jj = 0;jj < 4;jj++)
+ {
+ b0 = _mm_cvtsi32_si64(*(int *)(&block_l0[jj]));
+ b0 = _mm_unpacklo_pi8(b0, mmx_zero);
+ b1 = _mm_cvtsi32_si64(*(int *)(& mb_pred[jj]));
+ b1 = _mm_unpacklo_pi8(b1, mmx_zero);
+ b0 = _mm_add_pi16(b0, b1);
+ b0 = _mm_add_pi16(b0, mmx_one);
+ b0 = _mm_srai_pi16(b0, 1);
+ b0 = _mm_packs_pu16(b0, b0);
+ *(int *)(&mb_pred[jj]) = _mm_cvtsi64_si32(b0);
+ }
+}
+
+void bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0)
+{
+ int jj;
+ __m128i b0, b1;
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_one = _mm_set1_epi16(1);
+
+ for(jj = 0;jj < 8;jj++)
+ {
+ LOAD_LINE_EPI16(b0, (__m128i *)block_l0[jj]);
+ LOAD_LINE_EPI16(b1, (__m128i *)mb_pred[jj]);
+ b0 = _mm_add_epi16(b0, b1);
+ b0 = _mm_add_epi16(b0, xmm_one);
+ b0 = _mm_srai_epi16(b0, 1);
+ b0 = _mm_packus_epi16(b0, b0);
+ _mm_storel_epi64((__m128i *)mb_pred[jj], b0);
+ }
+}
+
+#ifdef H264_IPP
+void bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0)
+{
+ ippiInterpolateBlock_H264_8u_P2P1R(block_l0[0], mb_pred[0], mb_pred[0], 8, 8, sizeof(mb_pred[0]));
+}
+#endif
+/*!
+************************************************************************
+* \brief
+* block weighted biprediction
+************************************************************************
+*/
+static inline void weighted_bi_prediction(h264_imgpel_macroblock_row_t *mb_pred,
+ const h264_imgpel_macroblock_t block_l0,
+ int ver_block_size, int hor_block_size,
+ int wp_scale_l0, int wp_scale_l1,
+ int wp_offset, int weight_denom)
+{
+#ifdef H264_IPP
+ IppiSize roi = {hor_block_size, ver_block_size};
+ ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi);
+#else
+ int ii, jj;
+
+ if (weight_denom > 0)
+ {
+ for(jj = 0; jj < ver_block_size; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<hor_block_size;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj = 0; jj < ver_block_size; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<hor_block_size;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+#endif
+}
+
+void weighted_bi_prediction8x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ int jj;
+
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_scale_l0 = _mm_set1_epi16(wp_scale_l0);
+ __m128i xmm_scale_l1 = _mm_set1_epi16(wp_scale_l1);
+ __m128i xmm_offset = _mm_set1_epi16(wp_offset);
+ if (weight_denom > 0)
+ {
+ __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom);
+ __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1)));
+
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0, b1;
+ LOAD_LINE_EPI16(b0, (__m128i *)block_l0[jj]);
+ LOAD_LINE_EPI16(b1, (__m128i *)mb_pred[jj]);
+
+ b0 = _mm_mullo_epi16(b0, xmm_scale_l0);
+ b1 = _mm_mullo_epi16(b1, xmm_scale_l1);
+ b0 = _mm_add_epi16(b0, b1);
+ b0 = _mm_add_epi16(b0, xmm_add);
+ b0 = _mm_sra_epi16(b0, xmm_shift);
+ b0 = _mm_add_epi16(b0, xmm_offset);
+
+ b0 = _mm_packus_epi16(b0, b0); // convert back to epi8
+ _mm_storel_epi64((__m128i *)mb_pred[jj], b0);
+ // (x + (1 << (a-1) )) >> a;
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+
+ __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom);
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0, b1;
+ LOAD_LINE_EPI16(b0, (__m128i *)block_l0[jj]);
+ LOAD_LINE_EPI16(b1, (__m128i *)mb_pred[jj]);
+
+ b0 = _mm_mullo_epi16(b0, xmm_scale_l0);
+ b1 = _mm_mullo_epi16(b1, xmm_scale_l1);
+ b0 = _mm_add_epi16(b0, b1);
+ b0 = _mm_sll_epi16(b0, xmm_shift);
+ b0 = _mm_add_epi16(b0, xmm_offset);
+
+ b0 = _mm_packus_epi16(b0, b0); // convert back to epi8
+ _mm_storel_epi64((__m128i *)mb_pred[jj], b0);
+ //(x << a);
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+}
+
+#ifdef H264_IPP
+void weighted_bi_prediction8x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ IppiSize roi = {8, 8};
+ ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi);
+}
+#endif
+
+void weighted_bi_prediction8x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ int ii, jj;
+
+ if (weight_denom > 0)
+ {
+ for(jj = 0; jj < 8; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<8;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj = 0; jj < 8; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<8;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+}
+
+#if defined(_DEBUG) || !defined(_M_IX86)
+static inline void weighted_bi_prediction4x4(h264_imgpel_macroblock_row_t *mb_pred,
+ const h264_imgpel_macroblock_t block_l0,
+ uint16_t wp_scale_l0,
+ uint16_t wp_scale_l1,
+ uint16_t wp_offset,
+ int weight_denom)
+{
+#ifdef H264_IPP
+ IppiSize roi = {4, 4};
+ ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi);
+#else
+ int ii, jj;
+
+ if (weight_denom > 0)
+ {
+ for(jj = 0; jj < 4; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<4;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj = 0; jj < 4; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<4;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+#endif
+}
+#else
+extern void weighted_bi_prediction4x4(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom);
+#endif
+
+void weighted_bi_prediction16x16_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ int jj;
+
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_scale_l0 = _mm_set1_epi16(wp_scale_l0);
+ __m128i xmm_scale_l1 = _mm_set1_epi16(wp_scale_l1);
+ __m128i xmm_offset = _mm_set1_epi16(wp_offset);
+ if (weight_denom > 0)
+ {
+ __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom);
+ __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1)));
+
+ for(jj = 0; jj < 16; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]);
+ __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero);
+ __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0);
+ b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0);
+ b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1);
+ b0_low = _mm_add_epi16(b0_low, b1_low);
+ b0_high = _mm_add_epi16(b0_high, b1_high);
+ b0_low = _mm_add_epi16(b0_low, xmm_add);
+ b0_high = _mm_add_epi16(b0_high, xmm_add);
+ b0_low = _mm_sra_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sra_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+ // (x + (1 << (a-1) )) >> a;
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+
+ __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom);
+ for(jj = 0; jj < 16; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]);
+ __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero);
+ __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0);
+ b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0);
+ b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1);
+ b0_low = _mm_add_epi16(b0_low, b1_low);
+ b0_high = _mm_add_epi16(b0_high, b1_high);
+ b0_low = _mm_sll_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sll_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+
+ //(x << a);
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+}
+
+#ifdef H264_IPP
+void weighted_bi_prediction16x16_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ IppiSize roi = {16, 16};
+ ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi);
+}
+#endif
+
+void weighted_bi_prediction16x16_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ int ii, jj;
+
+ if (weight_denom > 0)
+ {
+ for(jj = 0; jj < 16; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj = 0; jj < 16; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+}
+
+/* 16x8 */
+void weighted_bi_prediction16x8_sse2(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ int jj;
+
+ __m128i xmm_zero = _mm_setzero_si128();
+ __m128i xmm_scale_l0 = _mm_set1_epi16(wp_scale_l0);
+ __m128i xmm_scale_l1 = _mm_set1_epi16(wp_scale_l1);
+ __m128i xmm_offset = _mm_set1_epi16(wp_offset);
+ if (weight_denom > 0)
+ {
+ __m128i xmm_shift = _mm_cvtsi32_si128(weight_denom);
+ __m128i xmm_add = _mm_set1_epi16((1<<(weight_denom-1)));
+
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]);
+ __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero);
+ __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0);
+ b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0);
+ b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1);
+ b0_low = _mm_add_epi16(b0_low, b1_low);
+ b0_high = _mm_add_epi16(b0_high, b1_high);
+ b0_low = _mm_add_epi16(b0_low, xmm_add);
+ b0_high = _mm_add_epi16(b0_high, xmm_add);
+ b0_low = _mm_sra_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sra_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+ // (x + (1 << (a-1) )) >> a;
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+
+ __m128i xmm_shift = _mm_cvtsi32_si128(-weight_denom);
+ for(jj = 0; jj < 8; jj++)
+ {
+ __m128i b0 = _mm_load_si128((__m128i *)block_l0[jj]);
+ __m128i b1 = _mm_load_si128((__m128i *)mb_pred[jj]);
+
+ __m128i b0_low = _mm_unpacklo_epi8(b0, xmm_zero);
+ __m128i b0_high = _mm_unpackhi_epi8(b0, xmm_zero);
+ __m128i b1_low = _mm_unpacklo_epi8(b1, xmm_zero);
+ __m128i b1_high = _mm_unpackhi_epi8(b1, xmm_zero);
+ b0_low = _mm_mullo_epi16(b0_low, xmm_scale_l0);
+ b1_low = _mm_mullo_epi16(b1_low, xmm_scale_l1);
+ b0_high = _mm_mullo_epi16(b0_high, xmm_scale_l0);
+ b1_high = _mm_mullo_epi16(b1_high, xmm_scale_l1);
+ b0_low = _mm_add_epi16(b0_low, b1_low);
+ b0_high = _mm_add_epi16(b0_high, b1_high);
+ b0_low = _mm_sll_epi16(b0_low, xmm_shift);
+ b0_high = _mm_sll_epi16(b0_high, xmm_shift);
+ b0_low = _mm_add_epi16(b0_low, xmm_offset);
+ b0_high = _mm_add_epi16(b0_high, xmm_offset);
+
+ b0_low = _mm_packus_epi16(b0_low, b0_high); // convert back to epi8
+ _mm_store_si128((__m128i *)mb_pred[jj], b0_low);
+
+ //(x << a);
+ // row[ii] = (imgpel) iClip1(color_clip, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+}
+
+#ifdef H264_IPP
+void weighted_bi_prediction16x8_ipp(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ IppiSize roi = {16, 8};
+ ippiWeightedAverage_H264_8u_C1IR(block_l0[0], mb_pred[0], sizeof(mb_pred[0]), wp_scale_l0, wp_scale_l1, weight_denom, wp_offset, roi);
+}
+#endif
+
+void weighted_bi_prediction16x8_c(h264_imgpel_macroblock_row_t *mb_pred, const h264_imgpel_macroblock_t block_l0, int wp_scale_l0, int wp_scale_l1, int wp_offset, int weight_denom)
+{
+ int ii, jj;
+
+ if (weight_denom > 0)
+ {
+ for(jj = 0; jj < 8; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_pos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+ else
+ {
+ weight_denom = -weight_denom;
+ for(jj = 0; jj < 8; jj++)
+ {
+ const imgpel *b0 = block_l0[jj];
+ imgpel *row = mb_pred[jj];
+ const imgpel *b1 = row;
+
+ for(ii=0;ii<16;ii++)
+ row[ii] = (imgpel) iClip1(255, (rshift_rnd_nonpos((wp_scale_l0 * b0[ii] + wp_scale_l1 * b1[ii]), weight_denom) + wp_offset));
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* No reference picture mc
+************************************************************************
+*/
+static void get_data_no_ref(h264_imgpel_macroblock_row_t *block, int ver_block_size, int hor_block_size, imgpel med_imgpel_value)
+{
+ int i, j;
+#ifdef _DEBUG
+ printf("list[ref_frame] is equal to 'no reference picture' before RAP\n");
+#endif
+
+ /* fill the block with sample value middle value */
+ for (j = 0; j < ver_block_size; j++)
+ for (i = 0; i < hor_block_size; i++)
+ block[j][i] = med_imgpel_value;
+}
+
+/*!
+************************************************************************
+* \brief
+* Interpolation of 1/4 subpixel
+************************************************************************
+*/
+void get_block_luma(Macroblock *currMB, ColorPlane pl, StorablePicture *curr_ref, int x_pos, int y_pos, const short *motion_vector, int hor_block_size, int ver_block_size, h264_imgpel_macroblock_row_t *block)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ if (curr_ref == p_Vid->no_reference_picture && p_Vid->framepoc < p_Vid->recovery_poc)
+ {
+ get_data_no_ref(block, ver_block_size, hor_block_size, (imgpel) p_Vid->dc_pred_value_comp[pl]);
+ }
+ else
+ {
+ IppVCInterpolateBlock_8u block_data;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ VideoImage *cur_imgY = curr_ref->imgY;
+
+ if (IS_INDEPENDENT(p_Vid))
+ {
+ switch(p_Vid->colour_plane_id )
+ {
+ case 1:
+ cur_imgY = curr_ref->imgUV[0];
+ break;
+ case 2:
+ cur_imgY = curr_ref->imgUV[1];
+ break;
+ }
+ }
+ else if (pl!=PLANE_Y)
+ {
+ cur_imgY = curr_ref->imgUV[pl-1];
+ }
+
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = block[0];
+ block_data.dstStep = sizeof(block[0]);
+ block_data.sizeFrame.width = dec_picture->size_x;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y;
+ block_data.sizeBlock.width = hor_block_size;
+ block_data.sizeBlock.height = ver_block_size;
+ block_data.pointBlockPos.x = x_pos << 2;
+ block_data.pointBlockPos.y = y_pos << 2;
+ block_data.pointVector.x = motion_vector[0];
+ block_data.pointVector.y = motion_vector[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Chroma (0,0)
+************************************************************************
+*/
+static void get_chroma_00(h264_imgpel_macroblock_t block, const VideoImage *image, int ver_block_size, int hor_block_size, int x_pos, int y_pos)
+{
+ ptrdiff_t src_stride = image->stride; // in case the compiler doesn't optimize this
+ imgpel *src = image->base_address + src_stride * y_pos + x_pos;
+
+ int j, i;
+ switch(hor_block_size) // basically just unrolling this
+ {
+ case 16:
+ for (j = 0; j < ver_block_size; j++)
+ {
+ imgpel *row = block[j];
+ for (i = 0; i < 16; i++)
+ {
+ row[i] = src[i];
+ }
+ src+=src_stride;
+ }
+ break;
+ case 8:
+ for (j = 0; j < ver_block_size; j++)
+ {
+ imgpel *row = block[j];
+ for (i = 0; i < 8; i++)
+ {
+ row[i] = src[i];
+ }
+ src+=src_stride;
+ }
+ break;
+ case 4:
+ for (j = 0; j < ver_block_size; j++)
+ {
+ imgpel *row = block[j];
+ for (i = 0; i < 4; i++)
+ {
+ row[i] = src[i];
+ }
+ src+=src_stride;
+ }
+ break;
+ case 2:
+ for (j = 0; j < ver_block_size; j++)
+ {
+ imgpel *row = block[j];
+ for (i = 0; i < 2; i++)
+ {
+ row[i] = src[i];
+ }
+ src+=src_stride;
+ }
+ break;
+ default: //degenerate case
+ for (j = 0; j < ver_block_size; j++)
+ {
+ imgpel *row = block[j];
+ for (i = 0; i < hor_block_size; i++)
+ {
+ row[i] = src[i];
+ }
+ src+=src_stride;
+ }
+ break;
+ }
+}
+
+static void get_block_chroma(Macroblock *currMB, StorablePicture *curr_ref, int x_pos, int y_pos, const short *motion_vector, int hor_block_size, int ver_block_size, h264_imgpel_macroblock_row_t *block0, h264_imgpel_macroblock_row_t *block1, int ioff, int joff)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ if (curr_ref == p_Vid->no_reference_picture && p_Vid->framepoc < p_Vid->recovery_poc)
+ {
+ get_data_no_ref(block0, ver_block_size, hor_block_size, (imgpel) p_Vid->dc_pred_value_comp[1]);
+ get_data_no_ref(block1, ver_block_size, hor_block_size, (imgpel) p_Vid->dc_pred_value_comp[2]);
+ }
+ else
+ {
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ IppVCInterpolateBlock_8u block_data;
+
+ block_data.pSrc[0] = curr_ref->imgUV[0]->base_address;
+ block_data.pSrc[1] = curr_ref->imgUV[1]->base_address;
+ block_data.srcStep = curr_ref->imgUV[0]->stride;
+ block_data.pDst[0] = &block0[joff][ioff];
+ block_data.pDst[1] = &block1[joff][ioff];
+ block_data.dstStep = sizeof(block0[0]);
+ block_data.sizeFrame.width = dec_picture->size_x_cr;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y_cr >> 1): dec_picture->size_y_cr;
+ block_data.sizeBlock.width = hor_block_size;
+ block_data.sizeBlock.height = ver_block_size;
+ if (dec_picture->chroma_format_idc == YUV444)
+ {
+ block_data.pointBlockPos.x = x_pos;
+ block_data.pointVector.x = motion_vector[0] << 1;
+ }
+ else
+ {
+ block_data.pointBlockPos.x = x_pos<<1;
+ block_data.pointVector.x = motion_vector[0];
+ }
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ block_data.pointVector.y = motion_vector[1];
+ block_data.pointBlockPos.y = y_pos<<1;
+ }
+ else
+ {
+ block_data.pointBlockPos.y = y_pos;
+ block_data.pointVector.y = motion_vector[1] << 1;
+ }
+
+ ippiInterpolateChromaBlock_H264_8u_P2R(&block_data);
+
+
+ }
+}
+
+
+void intra_cr_decoding(Macroblock *currMB, int yuv)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ StorablePicture *dec_picture = p_Vid->dec_picture;
+ int uv;
+ int b8,b4;
+ int ioff, joff;
+ // TODO: fix 4x4 lossless
+
+ for(uv = 0; uv < 2; uv++)
+ {
+ int pl = uv + 1;
+ const h264_short_block_t *blocks = currSlice->cof4[pl];
+ const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+ h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ intrapred_chroma(currMB, uv);
+
+ if ((!(currMB->mb_type == SI4MB) && (currMB->cbp >> 4)) )
+ {
+ if (yuv == YUV420-1)
+ {
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ copy_image_data_8x8_stride(dec_picture->imgUV[uv], currMB->pix_c_x, currMB->pix_c_y, mb_rec);
+ }
+ else
+ {
+ for (b8 = 0; b8 < (p_Vid->num_uv_blocks); b8++)
+ {
+ for(b4 = 0; b4 < 4; b4++)
+ {
+ joff = subblk_offset_y[yuv][b8][b4];
+ ioff = subblk_offset_x[yuv][b8][b4];
+
+ opt_itrans4x4(blocks[cof4_pos_to_subblock[joff>>2][ioff>>2]], mb_pred, mb_rec, ioff, joff);
+
+ copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + ioff, currMB->pix_c_y + joff, mb_rec, ioff, joff);
+ }
+ }
+ }
+ }
+ else if (currMB->mb_type == SI4MB)
+ {
+ itrans_sp_cr(currMB, uv);
+
+ opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+ copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 0, currMB->pix_c_y + 0, mb_rec, 0, 0);
+ opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+ copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 4, currMB->pix_c_y + 0, mb_rec, 4, 0);
+ opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+ copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 0, currMB->pix_c_y + 4, mb_rec, 0, 4);
+ opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+ copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + 4, currMB->pix_c_y + 4, mb_rec, 4, 4);
+ }
+ else
+ {
+ if (yuv == YUV420-1)
+ {
+ copy_image_data_8x8_stride(dec_picture->imgUV[uv], currMB->pix_c_x, currMB->pix_c_y, mb_pred);
+ }
+ else
+ {
+ for (b8 = 0; b8 < (p_Vid->num_uv_blocks); b8++)
+ {
+ for(b4 = 0; b4 < 4; b4++)
+ {
+ joff = subblk_offset_y[yuv][b8][b4];
+ ioff = subblk_offset_x[yuv][b8][b4];
+
+ copy_image_data_4x4_stride(dec_picture->imgUV[uv], currMB->pix_c_x + ioff, currMB->pix_c_y + joff, mb_pred, ioff, joff);
+ }
+ }
+ }
+ }
+ }
+}
+
+void prepare_direct_params(Macroblock *currMB, StorablePicture *dec_picture, short pmvl0[2], short pmvl1[2],char *l0_rFrame, char *l1_rFrame)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ Slice *currSlice = currMB->p_Slice;
+ char l0_rFrameL, l0_rFrameU, l0_rFrameUR;
+ char l1_rFrameL, l1_rFrameU, l1_rFrameUR;
+ PicMotionParams *motion = &dec_picture->motion;
+
+ PixelPos mb[4];
+
+ get_neighbors0016(currMB, mb);
+
+ if (!currSlice->mb_aff_frame_flag)
+ {
+ l0_rFrameL = (char) (mb[0].available ? motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx : -1);
+ l0_rFrameU = (char) (mb[1].available ? motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx : -1);
+ l0_rFrameUR = (char) (mb[2].available ? motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx : -1);
+
+ l1_rFrameL = (char) (mb[0].available ? motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx : -1);
+ l1_rFrameU = (char) (mb[1].available ? motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx : -1);
+ l1_rFrameUR = (char) (mb[2].available ? motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx : -1);
+ }
+ else
+ {
+ if (currMB->mb_field)
+ {
+ l0_rFrameL = (char) (mb[0].available
+ ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx < 0
+ ? motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx
+ : motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx * 2: -1);
+
+ l0_rFrameU = (char) (mb[1].available
+ ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx < 0
+ ? motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx
+ : motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx * 2: -1);
+
+ l0_rFrameUR = (char) (mb[2].available
+ ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx < 0
+ ? motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx
+ : motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx * 2: -1);
+
+ l1_rFrameL = (char) (mb[0].available
+ ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx < 0
+ ? motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx
+ : motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx * 2: -1);
+
+ l1_rFrameU = (char) (mb[1].available
+ ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx < 0
+ ? motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx
+ : motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx * 2: -1);
+
+ l1_rFrameUR = (char) (mb[2].available
+ ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx < 0
+ ? motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx
+ : motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx * 2: -1);
+ }
+ else
+ {
+ l0_rFrameL = (char) (mb[0].available
+ ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx < 0
+ ? motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx >> 1
+ : motion->motion[LIST_0][mb[0].pos_y][mb[0].pos_x].ref_idx: -1);
+
+ l0_rFrameU = (char) (mb[1].available
+ ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx < 0
+ ? motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx >> 1
+ : motion->motion[LIST_0][mb[1].pos_y][mb[1].pos_x].ref_idx : -1);
+
+ l0_rFrameUR = (char) (mb[2].available
+ ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx < 0
+ ? motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx >> 1
+ : motion->motion[LIST_0][mb[2].pos_y][mb[2].pos_x].ref_idx : -1);
+
+ l1_rFrameL = (char) (mb[0].available
+ ? p_Vid->mb_data[mb[0].mb_addr].mb_field || motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx < 0
+ ? motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx >> 1
+ : motion->motion[LIST_1][mb[0].pos_y][mb[0].pos_x].ref_idx : -1);
+
+ l1_rFrameU = (char) (mb[1].available
+ ? p_Vid->mb_data[mb[1].mb_addr].mb_field || motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx < 0
+ ? motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx >> 1
+ : motion->motion[LIST_1][mb[1].pos_y][mb[1].pos_x].ref_idx : -1);
+
+ l1_rFrameUR = (char) (mb[2].available
+ ? p_Vid->mb_data[mb[2].mb_addr].mb_field || motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx < 0
+ ? motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx >> 1
+ : motion->motion[LIST_1][mb[2].pos_y][mb[2].pos_x].ref_idx : -1);
+ }
+ }
+
+ *l0_rFrame = (char) ((l0_rFrameL >= 0 && l0_rFrameU >= 0) ? imin(l0_rFrameL,l0_rFrameU) : imax(l0_rFrameL,l0_rFrameU));
+ *l0_rFrame = (char) ((*l0_rFrame >= 0 && l0_rFrameUR >= 0) ? imin(*l0_rFrame,l0_rFrameUR): imax(*l0_rFrame,l0_rFrameUR));
+
+ *l1_rFrame = (char) ((l1_rFrameL >= 0 && l1_rFrameU >= 0) ? imin(l1_rFrameL,l1_rFrameU) : imax(l1_rFrameL,l1_rFrameU));
+ *l1_rFrame = (char) ((*l1_rFrame >= 0 && l1_rFrameUR >= 0) ? imin(*l1_rFrame,l1_rFrameUR): imax(*l1_rFrame,l1_rFrameUR));
+
+ if (*l0_rFrame >=0)
+ currMB->GetMVPredictor (currMB, mb, pmvl0, *l0_rFrame, motion->motion[LIST_0], 0, 0, 16, 16);
+
+ if (*l1_rFrame >=0)
+ currMB->GetMVPredictor (currMB, mb, pmvl1, *l1_rFrame, motion->motion[LIST_1], 0, 0, 16, 16);
+}
+
+static void check_motion_vector_range(VideoParameters *p_Vid, short mv_x, short mv_y)
+{
+#ifdef _DEBUG
+ if (mv_x > 8191 || mv_x < -8192)
+ {
+ fprintf(stderr,"WARNING! Horizontal motion vector %d is out of allowed range {-8192, 8191} in picture %d, macroblock %d\n", mv_x, p_Vid->number, p_Vid->current_mb_nr);
+ //error("invalid stream: too big horizontal motion vector", 500);
+ }
+
+ if (mv_y > (p_Vid->max_mb_vmv_r - 1) || mv_y < (-p_Vid->max_mb_vmv_r))
+ {
+ fprintf(stderr,"WARNING! Vertical motion vector %d is out of allowed range {%d, %d} in picture %d, macroblock %d\n", mv_y, (-p_Vid->max_mb_vmv_r), (p_Vid->max_mb_vmv_r - 1), p_Vid->number, p_Vid->current_mb_nr);
+ //error("invalid stream: too big vertical motion vector", 500);
+ }
+#endif
+}
+
+void perform_mc(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int block_size_x, int block_size_y, int curr_mb_field)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ Slice *currSlice = currMB->p_Slice;
+
+ static const int mv_mul = 16; // 4 * 4
+
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int ioff = (i << 2);
+ int joff = (j << 2);
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+
+ get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], block_size_y, block_size_x, alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1;
+ int block_size_x_cr = block_size_x >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1;
+
+ short mv_cr[2] = {mv_array[0], mv_array[1] + + ((active_sps->chroma_format_idc == YUV420)? list->chroma_vector_adjustment : 0) };
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ for(uv=0;uv<2;uv++)
+ {
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, block_size_x_cr, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l0_ref_idx = l0_refframe;
+ short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+ short l1_ref_idx = l1_refframe;
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, block_size_x, block_size_y, tmp_block_l0[0]);
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x, alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? joff : joff >> 1;
+ int block_size_x_cr = block_size_x >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1;
+
+ int vec1_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0);
+ int vec2_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0);
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) };
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) };
+
+ get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, block_size_x_cr, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ for(uv=0;uv<2;uv++)
+ {
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr);
+ }
+ }
+ }
+ }
+}
+
+
+
+void perform_mc8x16(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+ const int block_size_x=8;
+ const int block_size_y=16;
+ Slice *currSlice = currMB->p_Slice;
+
+ static const int mv_mul = 16; // 4 * 4
+
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int ioff = (i << 2);
+ int joff = (j << 2);
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+ get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], block_size_y, block_size_x, alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1;
+ int block_size_x_cr = block_size_x >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1;
+
+ short mv_cr[2] = {mv_array[0], mv_array[1] + + ((active_sps->chroma_format_idc == YUV420)? list->chroma_vector_adjustment : 0) };
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ for(uv=0;uv<2;uv++)
+ {
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, block_size_x_cr, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l0_ref_idx = l0_refframe;
+ short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+ short l1_ref_idx = l1_refframe;
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, block_size_x, block_size_y, tmp_block_l0[0]);
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, block_size_x, block_size_y, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x, alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], block_size_y, block_size_x);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? joff : joff >> 1;
+ int block_size_x_cr = block_size_x >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? block_size_y : block_size_y >> 1;
+
+ int vec1_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0);
+ int vec2_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0);
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) };
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) };
+
+ get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, block_size_x_cr, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ for(uv=0;uv<2;uv++)
+ {
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr);
+ }
+ }
+ }
+ }
+}
+
+void perform_mc16x8(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ Slice *currSlice = currMB->p_Slice;
+
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int ioff = (i << 2);
+ int joff = (j << 2);
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+ get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, 16, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ opt_weighted_mc_prediction16x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1;
+ int block_size_x_cr = 16 >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? 8 : 8 >> 1;
+
+ short mv_cr[2] = {mv_array[0], mv_array[1] + + ((active_sps->chroma_format_idc == YUV420)? list->chroma_vector_adjustment : 0) };
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ for(uv=0;uv<2;uv++)
+ {
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, block_size_x_cr, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l0_ref_idx = l0_refframe;
+ short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+ short l1_ref_idx = l1_refframe;
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, 16, 8, tmp_block_l0[0]);
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, 16, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ opt_weighted_bi_prediction16x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], 8, 16);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? joff : joff >> 1;
+ int block_size_x_cr = 16 >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? 8 : 8 >> 1;
+
+ int vec1_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0);
+ int vec2_y_cr = currMB->block_y_aff + j + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0);
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) };
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) };
+
+ get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, block_size_x_cr, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, block_size_x_cr, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ for(uv=0;uv<2;uv++)
+ {
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], block_size_y_cr, block_size_x_cr);
+ }
+ }
+ }
+ }
+}
+
+
+static void __forceinline perform_mc8x8_YUV420(Macroblock *currMB, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ Slice *currSlice = currMB->p_Slice;
+
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int ioff = (i << 2);
+ int joff = (j << 2);
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+ get_block_luma(currMB, PLANE_Y, list, i4, currMB->block_y_aff + j, mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ opt_weighted_mc_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+
+ {
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = joff >> 1;
+
+ short mv_cr[2] = {mv_array[0], mv_array[1] + list->chroma_vector_adjustment };
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, 4, 4, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+
+ for(uv=0;uv<2;uv++)
+ {
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], 4, 4, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_ref_idx = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l1_ref_idx = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+
+ StorablePicture *ref_image0 = p_Vid->listX[LIST_0 + list_offset][l0_ref_idx];
+ StorablePicture *ref_image1 = p_Vid->listX[LIST_1 + list_offset][l1_ref_idx];
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid))
+ {
+ get_block_luma(currMB, PLANE_Y, ref_image0, i4, currMB->block_y_aff + j, l0_mv_array, 8, 8, tmp_block_l0[0]);
+ get_block_luma(currMB, PLANE_Y, ref_image1, i4, currMB->block_y_aff + j, l1_mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff]);
+ }
+ else
+ {
+ IppVCInterpolateBlock_8u block_data;
+
+ block_data.pSrc[0] = ref_image0->imgY->base_address;
+ block_data.srcStep = ref_image0->imgY->stride;
+ block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]);
+ block_data.dstStep = sizeof(tmp_block_l0[0][0]);
+ block_data.sizeFrame.width = dec_picture->size_x;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y;
+ block_data.sizeBlock.width = 8;
+ block_data.sizeBlock.height = 8;
+ block_data.pointBlockPos.x = i4 << 2;
+ block_data.pointBlockPos.y = (currMB->block_y_aff + j) << 2;
+ block_data.pointVector.x = l0_mv_array[0];
+ block_data.pointVector.y = l0_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ block_data.pSrc[0] = ref_image1->imgY->base_address;
+ block_data.srcStep = ref_image1->imgY->stride;
+ block_data.pDst[0] = &currSlice->mb_pred[0][joff][ioff];
+ block_data.pointVector.x = l1_mv_array[0];
+ block_data.pointVector.y = l1_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ }
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ opt_weighted_bi_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[0][joff][ioff], tmp_block_l0[0], 8, 8);
+ }
+
+ {
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = joff >> 1;
+
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ref_image0->chroma_vector_adjustment};
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ref_image1->chroma_vector_adjustment};
+
+ if (p_Vid->framepoc < p_Vid->recovery_poc)
+ {
+ get_block_chroma(currMB, ref_image0, i4, currMB->block_y_aff + j, mv_cr1, 4, 4, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, ref_image1, i4, currMB->block_y_aff + j, mv_cr2, 4, 4, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+ }
+ else
+ {
+ IppVCInterpolateBlock_8u block_data;
+ block_data.pSrc[0] = ref_image0->imgUV[0]->base_address;
+ block_data.pSrc[1] = ref_image0->imgUV[1]->base_address;
+ block_data.srcStep = ref_image0->imgUV[0]->stride;
+ block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]);
+ block_data.pDst[1] = (Ipp8u *)(tmp_block_l0[1]);
+ block_data.dstStep = sizeof(tmp_block_l0[0][0]);
+ block_data.sizeFrame.width = dec_picture->size_x_cr;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y_cr >> 1): dec_picture->size_y_cr;
+ block_data.sizeBlock.width = 4;
+ block_data.sizeBlock.height = 4;
+ block_data.pointBlockPos.x = i4<<1;
+ block_data.pointVector.x = mv_cr1[0];
+ block_data.pointVector.y = mv_cr1[1];
+ block_data.pointBlockPos.y = (currMB->block_y_aff + j)<<1;
+ ippiInterpolateChromaBlock_H264_8u_P2R(&block_data);
+ block_data.pSrc[0] = ref_image1->imgUV[0]->base_address;
+ block_data.pSrc[1] = ref_image1->imgUV[1]->base_address;
+ block_data.srcStep = ref_image1->imgUV[0]->stride;
+ block_data.pDst[0] = &currSlice->mb_pred[1][joff_cr][ioff_cr];
+ block_data.pDst[1] = &currSlice->mb_pred[2][joff_cr][ioff_cr];
+ block_data.pointVector.x = mv_cr2[0];
+ block_data.pointVector.y = mv_cr2[1];
+ ippiInterpolateChromaBlock_H264_8u_P2R(&block_data);
+ }
+
+ for(uv=0;uv<2;uv++)
+ {
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ weighted_bi_prediction4x4((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction4x4_mmx((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv]);
+ }
+ }
+ }
+ }
+}
+
+void perform_mc8x8(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int i, int j, int list_offset, int curr_mb_field)
+{
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ perform_mc8x8_YUV420(currMB, dec_picture, pred_dir, i, j, list_offset, curr_mb_field);
+ }
+ else
+ {
+ VideoParameters *p_Vid = currMB->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ Slice *currSlice = currMB->p_Slice;
+
+ static const int mv_mul = 16; // 4 * 4
+
+ int i4 = currMB->block_x + i;
+ int j4 = currMB->block_y + j;
+ int ioff = (i << 2);
+ int joff = (j << 2);
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+ get_block_luma(currMB, pl, list, i4, currMB->block_y_aff + j, mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ opt_weighted_mc_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+
+ if ((dec_picture->chroma_format_idc != YUV400) && (dec_picture->chroma_format_idc != YUV444) )
+ { // YUV420 or YUV422
+ int uv;
+
+ int ioff_cr = ioff >> 1;
+ int joff_cr = (p_Vid->mb_cr_size_y == MB_BLOCK_SIZE) ? joff : joff >> 1;
+ int block_size_y_cr = p_Vid->mb_cr_size_y == MB_BLOCK_SIZE ? 8 : 4;
+
+ short mv_cr[2] = {mv_array[0], mv_array[1] };
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff + j, mv_cr, 4, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+
+ for(uv=0;uv<2;uv++)
+ {
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ weighted_mc_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv + 1][joff_cr][ioff_cr], block_size_y_cr, 4, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l0_ref_idx = l0_refframe;
+ short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+ short l1_ref_idx = l1_refframe;
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid) || pl!=PLANE_Y)
+ {
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, l0_mv_array, 8, 8, tmp_block_l0[0]);
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, l1_mv_array, 8, 8, (h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff]);
+ }
+ else
+ {
+ VideoImage *cur_imgY = p_Vid->listX[LIST_0 + list_offset][l0_refframe]->imgY;
+ IppVCInterpolateBlock_8u block_data;
+
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]);
+ block_data.dstStep = sizeof(tmp_block_l0[0][0]);
+ block_data.sizeFrame.width = dec_picture->size_x;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y;
+ block_data.sizeBlock.width = 8;
+ block_data.sizeBlock.height = 8;
+ block_data.pointBlockPos.x = i4 << 2;
+ block_data.pointBlockPos.y = (currMB->block_y_aff + j) << 2;
+ block_data.pointVector.x = l0_mv_array[0];
+ block_data.pointVector.y = l0_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ cur_imgY = p_Vid->listX[LIST_1 + list_offset][l1_refframe]->imgY;
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = &currSlice->mb_pred[pl][joff][ioff];
+ block_data.pointVector.x = l1_mv_array[0];
+ block_data.pointVector.y = l1_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ }
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ opt_weighted_bi_prediction8x8((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[pl][joff][ioff], tmp_block_l0[0], 8, 8);
+ }
+
+ if (dec_picture->chroma_format_idc == YUV422)
+ {
+ int uv;
+ int ioff_cr = ioff >> 1;
+ int joff_cr = joff;
+
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1]};
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1]};
+
+ get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff + j, mv_cr1, 4, 8, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff + j, mv_cr2, 4, 8, currSlice->mb_pred[1], currSlice->mb_pred[2], ioff_cr, joff_cr);
+
+ for(uv=0;uv<2;uv++)
+ {
+
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ weighted_bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], 8, 4, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction((h264_imgpel_macroblock_row_t *)&currSlice->mb_pred[uv+1][joff_cr][ioff_cr], tmp_block_l0[uv], 8, 4);
+ }
+ }
+ }
+ }
+ }
+}
+
+
+static void __forceinline perform_mc16x16_YUV420(Macroblock *currMB, StorablePicture *dec_picture, int pred_dir, int list_offset, int curr_mb_field)
+{
+ VideoParameters *p_Vid = currMB->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ Slice *currSlice = currMB->p_Slice;
+
+ static const int mv_mul = 16; // 4 * 4
+
+ int i4 = currMB->block_x;
+ int j4 = currMB->block_y;
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+ get_block_luma(currMB, PLANE_Y, list, i4, currMB->block_y_aff, mv_array, 16, 16, currSlice->mb_pred[0]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ opt_weighted_mc_prediction16x16(currSlice->mb_pred[0], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+ {
+ int uv;
+ short mv_cr[2] = {mv_array[0], mv_array[1] + list->chroma_vector_adjustment };
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff, mv_cr, 8, 8, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0);
+
+ for(uv=0;uv<2;uv++)
+ {
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ opt_weighted_mc_prediction8x8(currSlice->mb_pred[uv + 1], alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l0_ref_idx = l0_refframe;
+ short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+ short l1_ref_idx = l1_refframe;
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid))
+ {
+ get_block_luma(currMB, PLANE_Y, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff, l0_mv_array, 16, 16, tmp_block_l0[0]);
+ get_block_luma(currMB, PLANE_Y, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff, l1_mv_array, 16, 16, currSlice->mb_pred[0]);
+ }
+ else
+ {
+ VideoImage *cur_imgY = p_Vid->listX[LIST_0 + list_offset][l0_refframe]->imgY;
+ IppVCInterpolateBlock_8u block_data;
+
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]);
+ block_data.dstStep = sizeof(tmp_block_l0[0][0]);
+ block_data.sizeFrame.width = dec_picture->size_x;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y;
+ block_data.sizeBlock.width = 16;
+ block_data.sizeBlock.height = 16;
+ block_data.pointBlockPos.x = i4 << 2;
+ block_data.pointBlockPos.y = currMB->block_y_aff<< 2;
+ block_data.pointVector.x = l0_mv_array[0];
+ block_data.pointVector.y = l0_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ cur_imgY = p_Vid->listX[LIST_1 + list_offset][l1_refframe]->imgY;
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = (Ipp8u *)(currSlice->mb_pred[0]);
+ block_data.pointVector.x = l1_mv_array[0];
+ block_data.pointVector.y = l1_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ }
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ opt_weighted_bi_prediction16x16(currSlice->mb_pred[0], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction(currSlice->mb_pred[0], tmp_block_l0[0], 16, 16);
+ }
+
+ {
+ int uv;
+
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment };
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment };
+
+ get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff , mv_cr1, 8, 8, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff , mv_cr2, 8, 8, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0);
+
+ for(uv=0;uv<2;uv++)
+ {
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ opt_weighted_bi_prediction8x8(currSlice->mb_pred[uv+1], tmp_block_l0[uv], alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction(currSlice->mb_pred[uv+1], tmp_block_l0[uv], 8, 8);
+ }
+ }
+ }
+
+ }
+}
+
+
+
+void perform_mc16x16(Macroblock *currMB, ColorPlane pl, StorablePicture *dec_picture, int pred_dir, int list_offset, int curr_mb_field)
+{
+ if (dec_picture->chroma_format_idc == YUV420)
+ {
+ perform_mc16x16_YUV420(currMB, dec_picture, pred_dir, list_offset, curr_mb_field);
+ }
+ else
+ {
+ VideoParameters *p_Vid = currMB->p_Vid;
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ Slice *currSlice = currMB->p_Slice;
+
+ static const int mv_mul = 16; // 4 * 4
+
+ int i4 = currMB->block_x;
+ int j4 = currMB->block_y;
+
+ assert (pred_dir<=2);
+
+ if (pred_dir != 2)
+ {
+ //===== Single List Prediction =====
+ short ref_idx = dec_picture->motion.motion[pred_dir][j4][i4].ref_idx;
+ short ref_idx_wp = ref_idx;
+ short *mv_array = dec_picture->motion.motion[pred_dir][j4][i4].mv;
+ StorablePicture *list = p_Vid->listX[list_offset + pred_dir][ref_idx];
+
+ check_motion_vector_range(p_Vid, mv_array[0], mv_array[1]);
+
+ get_block_luma(currMB, pl, list, i4, currMB->block_y_aff, mv_array, 16, 16, currSlice->mb_pred[pl]);
+
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0, wp_offset;
+ if (curr_mb_field && ((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))))
+ {
+ ref_idx_wp >>=1;
+ }
+
+ alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][0];
+ wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][0];
+
+ opt_weighted_mc_prediction16x16(currSlice->mb_pred[pl], alpha_l0, wp_offset, currSlice->luma_log2_weight_denom);
+ }
+
+ if (dec_picture->chroma_format_idc == YUV422)
+ {
+ int uv;
+ short mv_cr[2] = {mv_array[0], mv_array[1]};
+ get_block_chroma(currMB, list, i4, currMB->block_y_aff, mv_cr, 8, 16, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0);
+
+ for(uv=0;uv<2;uv++)
+ {
+ if (currSlice->apply_weights)
+ {
+ int alpha_l0 = currSlice->wp_weight[pred_dir][ref_idx_wp][uv + 1];
+ int wp_offset = currSlice->wp_offset[pred_dir][ref_idx_wp][uv + 1];
+
+ weighted_mc_prediction(currSlice->mb_pred[uv + 1], 16, 8, alpha_l0, wp_offset, currSlice->chroma_log2_weight_denom);
+ }
+ }
+ }
+ }
+ else
+ {
+ //===== BI-PREDICTION =====
+ __declspec(align(32)) h264_imgpel_macroblock_t tmp_block_l0[2];
+ short *l0_mv_array = dec_picture->motion.motion[LIST_0][j4][i4].mv;
+ short *l1_mv_array = dec_picture->motion.motion[LIST_1][j4][i4].mv;
+
+ short l0_refframe = dec_picture->motion.motion[LIST_0][j4][i4].ref_idx;
+ short l0_ref_idx = l0_refframe;
+ short l1_refframe = dec_picture->motion.motion[LIST_1][j4][i4].ref_idx;
+ short l1_ref_idx = l1_refframe;
+
+ check_motion_vector_range(p_Vid, l0_mv_array[0], l0_mv_array[1]);
+ check_motion_vector_range(p_Vid, l1_mv_array[0], l1_mv_array[1]);
+
+ if (p_Vid->framepoc < p_Vid->recovery_poc || IS_INDEPENDENT(p_Vid) || pl!=PLANE_Y)
+ {
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff, l0_mv_array, 16, 16, tmp_block_l0[0]);
+ get_block_luma(currMB, pl, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff, l1_mv_array, 16, 16, currSlice->mb_pred[pl]);
+ }
+ else
+ {
+ VideoImage *cur_imgY = p_Vid->listX[LIST_0 + list_offset][l0_refframe]->imgY;
+ IppVCInterpolateBlock_8u block_data;
+
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = (Ipp8u *)(tmp_block_l0[0]);
+ block_data.dstStep = sizeof(tmp_block_l0[0][0]);
+ block_data.sizeFrame.width = dec_picture->size_x;
+ block_data.sizeFrame.height = (dec_picture->motion.mb_field[currMB->mbAddrX]) ? (dec_picture->size_y >> 1): dec_picture->size_y;
+ block_data.sizeBlock.width = 16;
+ block_data.sizeBlock.height = 16;
+ block_data.pointBlockPos.x = i4 << 2;
+ block_data.pointBlockPos.y = currMB->block_y_aff<< 2;
+ block_data.pointVector.x = l0_mv_array[0];
+ block_data.pointVector.y = l0_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ cur_imgY = p_Vid->listX[LIST_1 + list_offset][l1_refframe]->imgY;
+ block_data.pSrc[0] = cur_imgY->base_address;
+ block_data.srcStep = cur_imgY->stride;
+ block_data.pDst[0] = (Ipp8u *)(currSlice->mb_pred[pl]);
+ block_data.pointVector.x = l1_mv_array[0];
+ block_data.pointVector.y = l1_mv_array[1];
+ ippiInterpolateLumaBlock_H264_8u_P1R(&block_data);
+ }
+
+ if(currSlice->apply_weights)
+ {
+ int alpha_l0, alpha_l1, wp_offset;
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ // This code existed in the original. Seems pointless but copying it here for reference and in case temporal direct breaks.
+ // if (mv_mode==0 && currSlice->direct_spatial_mv_pred_flag==0 ) l1_ref_idx=0;
+ if (((p_Vid->active_pps->weighted_pred_flag&&(p_Vid->type==P_SLICE|| p_Vid->type == SP_SLICE))||
+ (p_Vid->active_pps->weighted_bipred_idc==1 && (p_Vid->type==B_SLICE))) && curr_mb_field)
+ {
+ l0_ref_idx >>=1;
+ l1_ref_idx >>=1;
+ }
+
+ alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][0];
+ wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][0] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][0] + 1) >>1);
+
+ opt_weighted_bi_prediction16x16(currSlice->mb_pred[pl], tmp_block_l0[0], alpha_l0, alpha_l1, wp_offset, (currSlice->luma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction(currSlice->mb_pred[pl], tmp_block_l0[0], 16, 16);
+ }
+
+ if (dec_picture->chroma_format_idc == YUV422)
+ { // YUV422
+ int uv;
+
+ int block_size_y_cr = p_Vid->mb_cr_size_y;
+
+ short mv_cr1[2] = {l0_mv_array[0], l0_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_0 + list_offset][l0_refframe]->chroma_vector_adjustment : 0) };
+ short mv_cr2[2] = {l1_mv_array[0], l1_mv_array[1] + ((active_sps->chroma_format_idc == 1)? p_Vid->listX[LIST_1 + list_offset][l1_refframe]->chroma_vector_adjustment : 0) };
+
+ get_block_chroma(currMB, p_Vid->listX[LIST_0 + list_offset][l0_refframe], i4, currMB->block_y_aff , mv_cr1, 8, block_size_y_cr, tmp_block_l0[0], tmp_block_l0[1], 0, 0);
+ get_block_chroma(currMB, p_Vid->listX[LIST_1 + list_offset][l1_refframe], i4, currMB->block_y_aff , mv_cr2, 8, block_size_y_cr, currSlice->mb_pred[1], currSlice->mb_pred[2], 0, 0);
+
+
+ for(uv=0;uv<2;uv++)
+ {
+ if(currSlice->apply_weights)
+ {
+ int wt_list_offset = (p_Vid->active_pps->weighted_bipred_idc==2)? list_offset : 0;
+
+ int alpha_l0 = currSlice->wbp_weight[LIST_0 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int alpha_l1 = currSlice->wbp_weight[LIST_1 + wt_list_offset][l0_ref_idx][l1_ref_idx][uv + 1];
+ int wp_offset = ((currSlice->wp_offset [LIST_0 + wt_list_offset][l0_ref_idx][uv + 1] + currSlice->wp_offset[LIST_1 + wt_list_offset][l1_ref_idx][uv + 1] + 1) >>1);
+
+ weighted_bi_prediction(currSlice->mb_pred[uv+1], tmp_block_l0[uv], block_size_y_cr, 8, alpha_l0, alpha_l1, wp_offset, (currSlice->chroma_log2_weight_denom + 1));
+ }
+ else
+ {
+ bi_prediction(currSlice->mb_pred[uv+1], tmp_block_l0[uv], block_size_y_cr, 8);
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/meminput.c b/Src/h264dec/ldecod/src/meminput.c
new file mode 100644
index 00000000..54465340
--- /dev/null
+++ b/Src/h264dec/ldecod/src/meminput.c
@@ -0,0 +1,134 @@
+#include "global.h"
+#include "meminput.h"
+
+void malloc_mem_input(VideoParameters *p_Vid)
+{
+ if ( (p_Vid->mem_input = (memory_input_t *) calloc(1, sizeof(memory_input_t))) == NULL)
+ {
+ snprintf(errortext, ET_SIZE, "Memory allocation for memory input failed");
+ error(errortext,100);
+ }
+}
+
+void free_mem_input(VideoParameters *p_Vid)
+{
+ free(p_Vid->mem_input);
+ p_Vid->mem_input = NULL;
+}
+
+/*!
+************************************************************************
+* \brief
+* returns a byte from IO buffer
+************************************************************************
+*/
+static inline uint8_t getfbyte(memory_input_t *mem_input)
+{
+ return mem_input->user_buffer[mem_input->user_buffer_read++];
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * returns if new start code is found at byte aligned position buf.
+ * new-startcode is of form N 0x00 bytes, followed by a 0x01 byte.
+ *
+ * \return
+ * 1 if start-code is found or \n
+ * 0, indicating that there is no start code
+ *
+ * \param Buf
+ * pointer to byte-stream
+ * \param zeros_in_startcode
+ * indicates number of 0x00 bytes in start-code.
+ ************************************************************************
+ */
+static inline int FindStartCode (unsigned char *Buf, int zeros_in_startcode)
+{
+ int i;
+
+ for (i = 0; i < zeros_in_startcode; i++)
+ {
+ if(*(Buf++) != 0)
+ {
+ return 0;
+ }
+ }
+
+ if(*Buf != 1)
+ return 0;
+
+ return 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Returns the size of the NALU (bits between start codes in case of
+ * Annex B. nalu->buf and nalu->len are filled. Other field in
+ * nalu-> remain uninitialized (will be taken care of by NALUtoRBSP.
+ *
+ * \return
+ * 0 if there is nothing any more to read (EOF)
+ * -1 in case of any error
+ *
+ * \note Side-effect: Returns length of start-code in bytes.
+ *
+ * \note
+ * GetAnnexbNALU expects start codes at byte aligned positions in the file
+ *
+ ************************************************************************
+ */
+int GetMemoryNALU (VideoParameters *p_Vid, NALU_t *nalu)
+{
+ memory_input_t *mem_input = p_Vid->mem_input;
+ if (!mem_input->user_buffer)
+ return 0;
+ nalu->len = mem_input->user_buffer_size;
+ memcpy(nalu->buf, mem_input->user_buffer, nalu->len);
+ memzero16(nalu->buf+nalu->len); // add some extra 0's to the end
+ nalu->forbidden_bit = (*(nalu->buf) >> 7) & 1;
+ nalu->nal_reference_idc = (NalRefIdc) ((*(nalu->buf) >> 5) & 3);
+ nalu->nal_unit_type = (NaluType) ((*(nalu->buf)) & 0x1f);
+ nalu->lost_packets = 0;
+ mem_input->user_buffer = 0;
+
+ if (mem_input->skip_b_frames && nalu->nal_reference_idc == NALU_PRIORITY_DISPOSABLE)
+ return 0;
+
+ if (mem_input->resetting && nalu->nal_unit_type != NALU_TYPE_IDR)
+ return 0;
+
+ mem_input->resetting = 0;
+
+ return 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Opens the bit stream file named fn
+ * \return
+ * none
+ ************************************************************************
+ */
+void OpenMemory(VideoParameters *p_Vid, const char *fn)
+{
+ memory_input_t *mem_input = p_Vid->mem_input;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Closes the bit stream file
+ ************************************************************************
+ */
+void CloseMemory(VideoParameters *p_Vid)
+{
+ memory_input_t *mem_input = p_Vid->mem_input;
+}
+
diff --git a/Src/h264dec/ldecod/src/nal.c b/Src/h264dec/ldecod/src/nal.c
new file mode 100644
index 00000000..73c39474
--- /dev/null
+++ b/Src/h264dec/ldecod/src/nal.c
@@ -0,0 +1,123 @@
+
+/*!
+ ************************************************************************
+ * \file nal.c
+ *
+ * \brief
+ * Converts Encapsulated Byte Sequence Packets (EBSP) to Raw Byte
+ * Sequence Packets (RBSP), and then onto String Of Data Bits (SODB)
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Shankar L. Regunathan <shanre@microsoft.com>
+ * - Tobias Oelbaum <oelbaum@drehvial.de>
+************************************************************************
+ */
+
+#include "contributors.h"
+#include "global.h"
+
+ /*!
+ ************************************************************************
+ * \brief
+ * Converts RBSP to string of data bits
+ * \param streamBuffer
+ * pointer to buffer containing data
+ * \param last_byte_pos
+ * position of the last byte containing data.
+ * \return last_byte_pos
+ * position of the last byte pos. If the last-byte was entirely a stuffing byte,
+ * it is removed, and the last_byte_pos is updated.
+ *
+************************************************************************/
+
+int RBSPtoSODB(byte *streamBuffer, int last_byte_pos)
+{
+ int ctr_bit, bitoffset;
+
+ bitoffset = 0;
+ //find trailing 1
+ ctr_bit = (streamBuffer[last_byte_pos-1] & (0x01<<bitoffset)); // set up control bit
+
+ while (ctr_bit==0)
+ { // find trailing 1 bit
+ bitoffset++;
+ if(bitoffset == 8)
+ {
+ if(last_byte_pos == 0)
+ printf(" Panic: All zero data sequence in RBSP \n");
+ assert(last_byte_pos != 0);
+ last_byte_pos -= 1;
+ bitoffset = 0;
+ }
+ ctr_bit= streamBuffer[last_byte_pos-1] & (0x01<<(bitoffset));
+ }
+
+
+ // We keep the stop bit for now
+/* if (remove_stop)
+ {
+ streamBuffer[last_byte_pos-1] -= (0x01<<(bitoffset));
+ if(bitoffset == 7)
+ return(last_byte_pos-1);
+ else
+ return(last_byte_pos);
+ }
+*/
+ return(last_byte_pos);
+
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Converts Encapsulated Byte Sequence Packets to RBSP
+* \param streamBuffer
+* pointer to data stream
+* \param end_bytepos
+* size of data stream
+* \param begin_bytepos
+* Position after beginning
+************************************************************************/
+
+// TODO: benski> optimize using BitScanReverse
+int EBSPtoRBSP(byte *streamBuffer, int end_bytepos)
+{
+ int i, j, count;
+ int begin_bytepos = 1;
+ count = 0;
+
+
+ if(end_bytepos < begin_bytepos)
+ return end_bytepos;
+
+ j = begin_bytepos;
+
+ for(i = begin_bytepos; i < end_bytepos; i++)
+ { //starting from begin_bytepos to avoid header information
+ //in NAL unit, 0x000000, 0x000001 or 0x000002 shall not occur at any byte-aligned position
+ if(count == ZEROBYTES_SHORTSTARTCODE && streamBuffer[i] < 0x03)
+ return j;//-1;
+ if(count == ZEROBYTES_SHORTSTARTCODE && streamBuffer[i] == 0x03)
+ {
+ //check the 4th byte after 0x000003, except when cabac_zero_word is used, in which case the last three bytes of this NAL unit must be 0x000003
+ if((i < end_bytepos-1) && (streamBuffer[i+1] > 0x03))
+ return -1;
+ //if cabac_zero_word is used, the final byte of this NAL unit(0x03) is discarded, and the last two bytes of RBSP must be 0x0000
+ if(i == end_bytepos-1)
+ return j;
+
+ i++;
+ count = 0;
+ }
+ streamBuffer[j] = streamBuffer[i];
+ if(streamBuffer[i] == 0x00)
+ count++;
+ else
+ count = 0;
+ j++;
+ }
+
+ return j;
+}
diff --git a/Src/h264dec/ldecod/src/nalu.c b/Src/h264dec/ldecod/src/nalu.c
new file mode 100644
index 00000000..e70617e3
--- /dev/null
+++ b/Src/h264dec/ldecod/src/nalu.c
@@ -0,0 +1,162 @@
+
+/*!
+ ************************************************************************
+ * \file nalu.c
+ *
+ * \brief
+ * Decoder NALU support functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ ************************************************************************
+ */
+
+#include "global.h"
+#include "nalu.h"
+#include "memalloc.h"
+#include "meminput.h"
+
+/*!
+*************************************************************************************
+* \brief
+* Initialize bitstream reading structure
+*
+* \param
+* p_Vid: Imageparameter information
+* \param
+* filemode:
+*
+*************************************************************************************
+*/
+void OpenMemory(VideoParameters *p_Vid, const char *fn);
+void CloseMemory(VideoParameters *p_Vid);
+int GetMemoryNALU (VideoParameters *p_Vid, NALU_t *nalu);
+
+void initBitsFile (VideoParameters *p_Vid)
+{
+ malloc_mem_input(p_Vid);
+ p_Vid->nalu = AllocNALU(MAX_CODED_FRAME_SIZE);
+
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Converts a NALU to an RBSP
+ *
+ * \param
+ * nalu: nalu structure to be filled
+ *
+ * \return
+ * length of the RBSP in bytes
+ *************************************************************************************
+ */
+
+static int NALUtoRBSP (NALU_t *nalu)
+{
+ assert (nalu != NULL);
+
+ nalu->len = EBSPtoRBSP (nalu->buf, nalu->len) ;
+
+ return nalu->len ;
+}
+
+/*!
+************************************************************************
+* \brief
+* Read the next NAL unit (with error handling)
+************************************************************************
+*/
+int read_next_nalu(VideoParameters *p_Vid, NALU_t *nalu)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+ int ret;
+
+ ret = GetMemoryNALU(p_Vid, nalu);
+
+ if (ret < 0)
+ {
+ error ("Error while getting the next NALU, exit\n", 601);
+ }
+ if (ret == 0)
+ {
+ return 0;
+ }
+
+ //In some cases, zero_byte shall be present. If current NALU is a VCL NALU, we can't tell
+ //whether it is the first VCL NALU at this point, so only non-VCL NAL unit is checked here.
+ CheckZeroByteNonVCL(p_Vid, nalu);
+
+ ret = NALUtoRBSP(nalu);
+
+ if (ret < 0)
+ error ("Invalid startcode emulation prevention found.", 602);
+
+
+ // Got a NALU
+ if (nalu->forbidden_bit)
+ {
+ error ("Found NALU with forbidden_bit set, bit error?", 603);
+ }
+
+ return nalu->len;
+}
+
+void CheckZeroByteNonVCL(VideoParameters *p_Vid, NALU_t *nalu)
+{
+ int CheckZeroByte=0;
+
+ //This function deals only with non-VCL NAL units
+ if(nalu->nal_unit_type>=1&&nalu->nal_unit_type<=5)
+ return;
+
+ //for SPS and PPS, zero_byte shall exist
+ if(nalu->nal_unit_type==NALU_TYPE_SPS || nalu->nal_unit_type==NALU_TYPE_PPS)
+ CheckZeroByte=1;
+ //check the possibility of the current NALU to be the start of a new access unit, according to 7.4.1.2.3
+ if(nalu->nal_unit_type==NALU_TYPE_AUD || nalu->nal_unit_type==NALU_TYPE_SPS ||
+ nalu->nal_unit_type==NALU_TYPE_PPS || nalu->nal_unit_type==NALU_TYPE_SEI ||
+ (nalu->nal_unit_type>=13 && nalu->nal_unit_type<=18))
+ {
+ if(p_Vid->LastAccessUnitExists)
+ {
+ p_Vid->LastAccessUnitExists=0; //deliver the last access unit to decoder
+ p_Vid->NALUCount=0;
+ }
+ }
+ p_Vid->NALUCount++;
+ //for the first NAL unit in an access unit, zero_byte shall exists
+ if(p_Vid->NALUCount==1)
+ CheckZeroByte=1;
+ if(CheckZeroByte && nalu->startcodeprefix_len==3)
+ {
+ // printf("Warning: zero_byte shall exist\n");
+ //because it is not a very serious problem, we do not exit here
+ }
+}
+
+void CheckZeroByteVCL(VideoParameters *p_Vid, NALU_t *nalu)
+{
+ int CheckZeroByte=0;
+
+ //This function deals only with VCL NAL units
+ if(!(nalu->nal_unit_type>=1&&nalu->nal_unit_type<=5))
+ return;
+
+ if(p_Vid->LastAccessUnitExists)
+ {
+ p_Vid->NALUCount=0;
+ }
+ p_Vid->NALUCount++;
+ //the first VCL NAL unit that is the first NAL unit after last VCL NAL unit indicates
+ //the start of a new access unit and hence the first NAL unit of the new access unit. (sounds like a tongue twister :-)
+ if(p_Vid->NALUCount == 1)
+ CheckZeroByte = 1;
+ p_Vid->LastAccessUnitExists = 1;
+ if(CheckZeroByte && nalu->startcodeprefix_len==3)
+ {
+ //printf("warning: zero_byte shall exist\n");
+ //because it is not a very serious problem, we do not exit here
+ }
+}
diff --git a/Src/h264dec/ldecod/src/nalucommon.c b/Src/h264dec/ldecod/src/nalucommon.c
new file mode 100644
index 00000000..fe900722
--- /dev/null
+++ b/Src/h264dec/ldecod/src/nalucommon.c
@@ -0,0 +1,73 @@
+
+/*!
+ ************************************************************************
+ * \file nalucommon.c
+ *
+ * \brief
+ * Common NALU support functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ ************************************************************************
+ */
+
+#include "global.h"
+#include "nalu.h"
+#include "memalloc.h"
+#include <bfc/platform/types.h>
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Allocates memory for a NALU
+ *
+ * \param buffersize
+ * size of NALU buffer
+ *
+ * \return
+ * pointer to a NALU
+ *************************************************************************************
+ */
+NALU_t *AllocNALU(int buffersize)
+{
+ NALU_t *n;
+
+ if ((n = (NALU_t*)calloc (1, sizeof (NALU_t))) == NULL)
+ return 0;
+
+ n->max_size=buffersize;
+
+ if ((n->buf = (uint8_t *)_aligned_malloc(buffersize, 32)) == NULL)
+ {
+ free (n);
+ return 0;
+ }
+ memset(n->buf, 0, buffersize);
+
+ return n;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Frees a NALU
+ *
+ * \param n
+ * NALU to be freed
+ *
+ *************************************************************************************
+ */
+void FreeNALU(NALU_t *n)
+{
+ if (n != NULL)
+ {
+ if (n->buf != NULL)
+ {
+ _aligned_free(n->buf);
+ n->buf=NULL;
+ }
+ free (n);
+ }
+}
diff --git a/Src/h264dec/ldecod/src/output.c b/Src/h264dec/ldecod/src/output.c
new file mode 100644
index 00000000..74576af0
--- /dev/null
+++ b/Src/h264dec/ldecod/src/output.c
@@ -0,0 +1,599 @@
+
+/*!
+ ************************************************************************
+ * \file output.c
+ *
+ * \brief
+ * Output an image and Trance support
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Suehring <suehring@hhi.de>
+ ************************************************************************
+ */
+
+#include "contributors.h"
+
+#include "global.h"
+#include "mbuffer.h"
+#include "image.h"
+#include "memalloc.h"
+#include "sei.h"
+
+static void write_out_picture(VideoParameters *p_Vid, StorablePicture *p);
+
+
+#if (PAIR_FIELDS_IN_OUTPUT)
+
+void clear_picture(VideoParameters *p_Vid, StorablePicture *p);
+
+/*!
+ ************************************************************************
+ * \brief
+ * output the pending frame buffer
+ * \param p_out
+ * Output file
+ ************************************************************************
+ */
+void flush_pending_output(VideoParameters *p_Vid)
+{
+ if (p_Vid->pending_output_state != FRAME)
+ {
+ write_out_picture(p_Vid, p_Vid->pending_output);
+ }
+
+ if (p_Vid->pending_output->imgY)
+ {
+ free_mem2Dpel (p_Vid->pending_output->imgY);
+ p_Vid->pending_output->imgY=NULL;
+ }
+ if (p_Vid->pending_output->imgUV)
+ {
+ free_mem3Dpel (p_Vid->pending_output->imgUV);
+ p_Vid->pending_output->imgUV=NULL;
+ }
+
+ p_Vid->pending_output_state = FRAME;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Writes out a storable picture
+ * If the picture is a field, the output buffers the picture and tries
+ * to pair it with the next field.
+ * \param p
+ * Picture to be written
+ * \param p_out
+ * Output file
+ ************************************************************************
+ */
+void write_picture(VideoParameters *p_Vid, StorablePicture *p, int real_structure)
+{
+ int i, add;
+
+ if (real_structure==FRAME)
+ {
+ flush_pending_output(p_Vid);
+ write_out_picture(p_Vid, p);
+ return;
+ }
+ if (real_structure == p_Vid->pending_output_state)
+ {
+ flush_pending_output(p_Vid);
+ write_picture(p_Vid, p, real_structure);
+ return;
+ }
+
+ if (p_Vid->pending_output_state == FRAME)
+ {
+ p_Vid->pending_output->size_x = p->size_x;
+ p_Vid->pending_output->size_y = p->size_y;
+ p_Vid->pending_output->size_x_cr = p->size_x_cr;
+ p_Vid->pending_output->size_y_cr = p->size_y_cr;
+ p_Vid->pending_output->chroma_format_idc = p->chroma_format_idc;
+
+ p_Vid->pending_output->frame_mbs_only_flag = p->frame_mbs_only_flag;
+ p_Vid->pending_output->frame_cropping_flag = p->frame_cropping_flag;
+ if (p_Vid->pending_output->frame_cropping_flag)
+ {
+ p_Vid->pending_output->frame_cropping_rect_left_offset = p->frame_cropping_rect_left_offset;
+ p_Vid->pending_output->frame_cropping_rect_right_offset = p->frame_cropping_rect_right_offset;
+ p_Vid->pending_output->frame_cropping_rect_top_offset = p->frame_cropping_rect_top_offset;
+ p_Vid->pending_output->frame_cropping_rect_bottom_offset = p->frame_cropping_rect_bottom_offset;
+ }
+
+ get_mem2Dpel (&(p_Vid->pending_output->imgY), p_Vid->pending_output->size_y, p_Vid->pending_output->size_x);
+ get_mem3Dpel (&(p_Vid->pending_output->imgUV), 2, p_Vid->pending_output->size_y_cr, p_Vid->pending_output->size_x_cr);
+
+ clear_picture(p_Vid, p_Vid->pending_output);
+
+ // copy first field
+ if (real_structure == TOP_FIELD)
+ {
+ add = 0;
+ }
+ else
+ {
+ add = 1;
+ }
+
+ for (i=0; i<p_Vid->pending_output->size_y; i+=2)
+ {
+ memcpy(p_Vid->pending_output->imgY[(i+add)], p->imgY[(i+add)], p->size_x * sizeof(imgpel));
+ }
+ for (i=0; i<p_Vid->pending_output->size_y_cr; i+=2)
+ {
+ memcpy(p_Vid->pending_output->imgUV[0][(i+add)], p->imgUV[0][(i+add)], p->size_x_cr * sizeof(imgpel));
+ memcpy(p_Vid->pending_output->imgUV[1][(i+add)], p->imgUV[1][(i+add)], p->size_x_cr * sizeof(imgpel));
+ }
+ p_Vid->pending_output_state = real_structure;
+ }
+ else
+ {
+ if ( (p_Vid->pending_output->size_x!=p->size_x) || (p_Vid->pending_output->size_y!= p->size_y)
+ || (p_Vid->pending_output->frame_mbs_only_flag != p->frame_mbs_only_flag)
+ || (p_Vid->pending_output->frame_cropping_flag != p->frame_cropping_flag)
+ || ( p_Vid->pending_output->frame_cropping_flag &&
+ ( (p_Vid->pending_output->frame_cropping_rect_left_offset != p->frame_cropping_rect_left_offset)
+ ||(p_Vid->pending_output->frame_cropping_rect_right_offset != p->frame_cropping_rect_right_offset)
+ ||(p_Vid->pending_output->frame_cropping_rect_top_offset != p->frame_cropping_rect_top_offset)
+ ||(p_Vid->pending_output->frame_cropping_rect_bottom_offset != p->frame_cropping_rect_bottom_offset)
+ )
+ )
+ )
+ {
+ flush_pending_output(p_Vid);
+ write_picture (p_Vid, p, real_structure);
+ return;
+ }
+ // copy second field
+ if (real_structure == TOP_FIELD)
+ {
+ add = 0;
+ }
+ else
+ {
+ add = 1;
+ }
+
+ for (i=0; i<p_Vid->pending_output->size_y; i+=2)
+ {
+ memcpy(p_Vid->pending_output->imgY[(i+add)], p->imgY[(i+add)], p->size_x * sizeof(imgpel));
+ }
+ for (i=0; i<p_Vid->pending_output->size_y_cr; i+=2)
+ {
+ memcpy(p_Vid->pending_output->imgUV[0][(i+add)], p->imgUV[0][(i+add)], p->size_x_cr * sizeof(imgpel));
+ memcpy(p_Vid->pending_output->imgUV[1][(i+add)], p->imgUV[1][(i+add)], p->size_x_cr * sizeof(imgpel));
+ }
+
+ p_Vid->pending_output->time_code = p->time_code;
+ flush_pending_output(p_Vid);
+ }
+}
+
+#else
+
+/*!
+ ************************************************************************
+ * \brief
+ * Writes out a storable picture without doing any output modifications
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param p
+ * Picture to be written
+ * \param p_out
+ * Output file
+ * \param real_structure
+ * real picture structure
+ ************************************************************************
+ */
+static void write_picture(VideoParameters *p_Vid, StorablePicture *p, int real_structure)
+{
+ write_out_picture(p_Vid, p);
+}
+
+
+#endif
+
+/*!
+************************************************************************
+* \brief
+* Writes out a storable picture
+*
+* \param p_Vid
+* image decoding parameters for current picture
+* \param p
+* Picture to be written
+* \param p_out
+* Output file
+************************************************************************
+*/
+static void write_out_picture(VideoParameters *p_Vid, StorablePicture *p)
+{
+#if 0
+ InputParameters *p_Inp = p_Vid->p_Inp;
+
+ static const int SubWidthC [4]= { 1, 2, 2, 1};
+ static const int SubHeightC [4]= { 1, 2, 1, 1};
+
+ int crop_left, crop_right, crop_top, crop_bottom;
+ int symbol_size_in_bytes = (p_Vid->pic_unit_bitsize_on_disk >> 3);
+ Boolean rgb_output = (Boolean) (p_Vid->active_sps->vui_seq_parameters.matrix_coefficients==0);
+ unsigned char *buf;
+
+ int ret;
+
+ if (p->non_existing)
+ return;
+
+ printf("*** Outputting poc %d, frame_num %d, frame_poc %d, pic_num %d\n", p->poc, p->frame_num, p->frame_poc, p->pic_num);
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ // note: this tone-mapping is working for RGB format only. Sharp
+ if (p->seiHasTone_mapping && rgb_output)
+ {
+ //printf("output frame %d with tone model id %d\n", p->frame_num, p->tone_mapping_model_id);
+ symbol_size_in_bytes = (p->tonemapped_bit_depth>8)? 2 : 1;
+ tone_map(p->imgY, p->tone_mapping_lut, p->size_x, p->size_y);
+ tone_map(p->imgUV[0], p->tone_mapping_lut, p->size_x_cr, p->size_y_cr);
+ tone_map(p->imgUV[1], p->tone_mapping_lut, p->size_x_cr, p->size_y_cr);
+ }
+#endif
+
+ if (p->frame_cropping_flag)
+ {
+ crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset;
+ crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset;
+ crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+ crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+ }
+ else
+ {
+ crop_left = crop_right = crop_top = crop_bottom = 0;
+ }
+
+ //printf ("write frame size: %dx%d\n", p->size_x-crop_left-crop_right,p->size_y-crop_top-crop_bottom );
+ initOutput(p_Vid, symbol_size_in_bytes);
+
+ // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version
+ buf = malloc (p->size_x*p->size_y*symbol_size_in_bytes);
+ if (NULL==buf)
+ {
+ no_mem_exit("write_out_picture: buf");
+ }
+
+ if(rgb_output)
+ {
+ crop_left = p->frame_cropping_rect_left_offset;
+ crop_right = p->frame_cropping_rect_right_offset;
+ crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+ crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+
+ p_Vid->img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+ ret = write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes);
+ if (ret != ((p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes))
+ {
+ error ("write_out_picture: error writing to RGB file", 500);
+ }
+
+ if (p->frame_cropping_flag)
+ {
+ crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset;
+ crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset;
+ crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+ crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+ }
+ else
+ {
+ crop_left = crop_right = crop_top = crop_bottom = 0;
+ }
+ }
+ // write Y
+ p_Vid->img2buf (p->imgY, buf, p->size_x, p->size_y, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+ ret = write(p_out, buf, (p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes);
+ if (ret != ((p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes))
+ {
+ error ("write_out_picture: error writing to YUV file", 500);
+ }
+
+ if (p->chroma_format_idc!=YUV400)
+ {
+ crop_left = p->frame_cropping_rect_left_offset;
+ crop_right = p->frame_cropping_rect_right_offset;
+ crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+ crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+
+ p_Vid->img2buf (p->imgUV[0], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+ ret = write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes);
+ if (ret != ((p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes))
+ {
+ error ("write_out_picture: error writing to YUV file", 500);
+ }
+ if (!rgb_output)
+ {
+ p_Vid->img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+ ret = write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes);
+ if (ret != ((p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes))
+ {
+ error ("write_out_picture: error writing to YUV file", 500);
+ }
+ }
+ }
+ else
+ {
+ if (p_Inp->write_uv)
+ {
+ int i,j;
+ imgpel cr_val = (imgpel) (1<<(p_Vid->bitdepth_luma - 1));
+
+ get_mem3Dpel (&(p->imgUV), 1, p->size_y/2, p->size_x/2);
+ for (j=0; j<p->size_y/2; j++)
+ for (i=0; i<p->size_x/2; i++)
+ p->imgUV[0][j][i]=cr_val;
+
+ // fake out U=V=128 to make a YUV 4:2:0 stream
+ p_Vid->img2buf (p->imgUV[0], buf, p->size_x/2, p->size_y/2, symbol_size_in_bytes, crop_left/2, crop_right/2, crop_top/2, crop_bottom/2);
+
+ ret = write(p_out, buf, symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2 );
+ if (ret != (symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2))
+ {
+ error ("write_out_picture: error writing to YUV file", 500);
+ }
+ ret = write(p_out, buf, symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2 );
+ if (ret != (symbol_size_in_bytes * (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2))
+ {
+ error ("write_out_picture: error writing to YUV file", 500);
+ }
+
+ free_mem3Dpel(p->imgUV);
+ p->imgUV=NULL;
+ }
+ }
+
+ free(buf);
+#endif
+ if (p)
+ {
+ p->retain_count++;
+ out_storable_picture_add(p_Vid, p);
+ free_storable_picture(p_Vid, p); // release the reference we added above (out_storable_picture will add its own)
+ }
+// fsync(p_out);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initialize output buffer for direct output
+ ************************************************************************
+ */
+void init_out_buffer(VideoParameters *p_Vid)
+{
+ p_Vid->out_buffer = alloc_frame_store();
+
+#if (PAIR_FIELDS_IN_OUTPUT)
+ p_Vid->pending_output = calloc (sizeof(StorablePicture), 1);
+ if (NULL==p_Vid->pending_output) no_mem_exit("init_out_buffer");
+ p_Vid->pending_output->imgUV = NULL;
+ p_Vid->pending_output->imgY = NULL;
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Uninitialize output buffer for direct output
+ ************************************************************************
+ */
+void uninit_out_buffer(VideoParameters *p_Vid)
+{
+ free_frame_store(p_Vid, p_Vid->out_buffer);
+ p_Vid->out_buffer=NULL;
+#if (PAIR_FIELDS_IN_OUTPUT)
+ flush_pending_output(p_Vid);
+ free (p_Vid->pending_output);
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Initialize picture memory with (Y:0,U:128,V:128)
+ ************************************************************************
+ */
+void clear_picture(VideoParameters *p_Vid, StorablePicture *p)
+{
+ int i,j;
+
+ for(i=0;i<p->size_y;i++)
+ {
+ for (j=0; j<p->size_x; j++)
+ p->imgY->img[i][j] = (imgpel) p_Vid->dc_pred_value_comp[0];
+ }
+ for(i=0;i<p->size_y_cr;i++)
+ {
+ for (j=0; j<p->size_x_cr; j++)
+ p->imgUV[0]->img[i][j] = (imgpel) p_Vid->dc_pred_value_comp[1];
+ }
+ for(i=0;i<p->size_y_cr;i++)
+ {
+ for (j=0; j<p->size_x_cr; j++)
+ p->imgUV[1]->img[i][j] = (imgpel) p_Vid->dc_pred_value_comp[2];
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Write out not paired direct output fields. A second empty field is generated
+ * and combined into the frame buffer.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param fs
+ * FrameStore that contains a single field
+ * \param p_out
+ * Output file
+ ************************************************************************
+ */
+static void write_unpaired_field(VideoParameters *p_Vid, FrameStore* fs)
+{
+ StorablePicture *p;
+ assert (fs->is_used<3);
+
+ if(fs->is_used & 0x01)
+ {
+ // we have a top field
+ // construct an empty bottom field
+ p = fs->top_field;
+ fs->bottom_field = alloc_storable_picture(p_Vid, BOTTOM_FIELD, p->size_x, 2*p->size_y, p->size_x_cr, 2*p->size_y_cr);
+ fs->bottom_field->chroma_format_idc = p->chroma_format_idc;
+ clear_picture(p_Vid, fs->bottom_field);
+ dpb_combine_field_yuv(p_Vid, fs);
+ write_picture (p_Vid, fs->frame, TOP_FIELD);
+ }
+
+ if(fs->is_used & 0x02)
+ {
+ // we have a bottom field
+ // construct an empty top field
+ p = fs->bottom_field;
+ fs->top_field = alloc_storable_picture(p_Vid, TOP_FIELD, p->size_x, 2*p->size_y, p->size_x_cr, 2*p->size_y_cr);
+ fs->top_field->chroma_format_idc = p->chroma_format_idc;
+ clear_picture(p_Vid, fs->top_field);
+ fs ->top_field->frame_cropping_flag = fs->bottom_field->frame_cropping_flag;
+ if(fs ->top_field->frame_cropping_flag)
+ {
+ fs ->top_field->frame_cropping_rect_top_offset = fs->bottom_field->frame_cropping_rect_top_offset;
+ fs ->top_field->frame_cropping_rect_bottom_offset = fs->bottom_field->frame_cropping_rect_bottom_offset;
+ fs ->top_field->frame_cropping_rect_left_offset = fs->bottom_field->frame_cropping_rect_left_offset;
+ fs ->top_field->frame_cropping_rect_right_offset = fs->bottom_field->frame_cropping_rect_right_offset;
+ }
+ dpb_combine_field_yuv(p_Vid, fs);
+ write_picture (p_Vid, fs->frame, BOTTOM_FIELD);
+ }
+
+ fs->is_used = 3;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Write out unpaired fields from output buffer.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param p_out
+ * Output file
+ ************************************************************************
+ */
+static void flush_direct_output(VideoParameters *p_Vid)
+{
+ write_unpaired_field(p_Vid, p_Vid->out_buffer);
+
+ free_storable_picture(p_Vid, p_Vid->out_buffer->frame);
+ p_Vid->out_buffer->frame = NULL;
+ free_storable_picture(p_Vid, p_Vid->out_buffer->top_field);
+ p_Vid->out_buffer->top_field = NULL;
+ free_storable_picture(p_Vid, p_Vid->out_buffer->bottom_field);
+ p_Vid->out_buffer->bottom_field = NULL;
+ p_Vid->out_buffer->is_used = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Write a frame (from FrameStore)
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param fs
+ * FrameStore containing the frame
+ * \param p_out
+ * Output file
+ ************************************************************************
+ */
+void write_stored_frame( VideoParameters *p_Vid, FrameStore *fs)
+{
+ // make sure no direct output field is pending
+ flush_direct_output(p_Vid);
+
+ if (fs->is_used<3)
+ {
+ write_unpaired_field(p_Vid, fs);
+ }
+ else
+ {
+ if (fs->recovery_frame)
+ p_Vid->recovery_flag = 1;
+ if ((!p_Vid->non_conforming_stream) || p_Vid->recovery_flag)
+ write_picture(p_Vid, fs->frame, FRAME);
+ }
+
+ fs->is_output = 1;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Directly output a picture without storing it in the DPB. Fields
+ * are buffered before they are written to the file.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param p
+ * Picture for output
+ * \param p_out
+ * Output file
+ ************************************************************************
+ */
+void direct_output(VideoParameters *p_Vid, StorablePicture *p)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+ if (p->structure==FRAME)
+ {
+ // we have a frame (or complementary field pair)
+ // so output it directly
+ flush_direct_output(p_Vid);
+ write_picture (p_Vid, p, FRAME);
+ free_storable_picture(p_Vid, p);
+ return;
+ }
+
+ if (p->structure == TOP_FIELD)
+ {
+ if (p_Vid->out_buffer->is_used &1)
+ flush_direct_output(p_Vid);
+ p_Vid->out_buffer->top_field = p;
+ p_Vid->out_buffer->is_used |= 1;
+ }
+
+ if (p->structure == BOTTOM_FIELD)
+ {
+ if (p_Vid->out_buffer->is_used &2)
+ flush_direct_output(p_Vid);
+ p_Vid->out_buffer->bottom_field = p;
+ p_Vid->out_buffer->is_used |= 2;
+ }
+
+ if (p_Vid->out_buffer->is_used == 3)
+ {
+ // we have both fields, so output them
+ dpb_combine_field_yuv(p_Vid, p_Vid->out_buffer);
+ p_Vid->out_buffer->frame->time_code = p->time_code;
+ write_picture (p_Vid, p_Vid->out_buffer->frame, FRAME);
+
+ free_storable_picture(p_Vid, p_Vid->out_buffer->frame);
+ p_Vid->out_buffer->frame = NULL;
+ free_storable_picture(p_Vid, p_Vid->out_buffer->top_field);
+ p_Vid->out_buffer->top_field = NULL;
+ free_storable_picture(p_Vid, p_Vid->out_buffer->bottom_field);
+ p_Vid->out_buffer->bottom_field = NULL;
+ p_Vid->out_buffer->is_used = 0;
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/parset.c b/Src/h264dec/ldecod/src/parset.c
new file mode 100644
index 00000000..1b49cbd8
--- /dev/null
+++ b/Src/h264dec/ldecod/src/parset.c
@@ -0,0 +1,779 @@
+
+/*!
+ ************************************************************************
+ * \file
+ * parset.c
+ * \brief
+ * Parameter Sets
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ *
+ ***********************************************************************
+ */
+
+#include "global.h"
+#include "image.h"
+#include "parsetcommon.h"
+#include "parset.h"
+#include "nalu.h"
+#include "memalloc.h"
+#include "fmo.h"
+#include "cabac.h"
+#include "vlc.h"
+#include "mbuffer.h"
+#include "erc_api.h"
+
+#if TRACE
+#define SYMTRACESTRING(s) strncpy(sym->tracestring,s,TRACESTRING_SIZE)
+#else
+#define SYMTRACESTRING(s) // do nothing
+#endif
+
+
+extern void init_frext(VideoParameters *p_Vid);
+
+// syntax for scaling list matrix values
+void Scaling_List(int *scalingList, int sizeOfScalingList, Boolean *UseDefaultScalingMatrix, Bitstream *s)
+{
+ int j, scanj;
+ int delta_scale, lastScale, nextScale;
+
+ lastScale = 8;
+ nextScale = 8;
+
+ for(j=0; j<sizeOfScalingList; j++)
+ {
+ scanj = (sizeOfScalingList==16) ? ZZ_SCAN[j]:ZZ_SCAN8[j];
+
+ if(nextScale!=0)
+ {
+ delta_scale = se_v ( " : delta_sl " , s);
+ nextScale = (lastScale + delta_scale + 256) % 256;
+ *UseDefaultScalingMatrix = (Boolean) (scanj==0 && nextScale==0);
+ }
+
+ scalingList[scanj] = (nextScale==0) ? lastScale:nextScale;
+ lastScale = scalingList[scanj];
+ }
+}
+// fill sps with content of p
+
+static void InterpretSPS (VideoParameters *p_Vid, DataPartition *p, seq_parameter_set_rbsp_t *sps)
+{
+ unsigned i;
+ unsigned n_ScalingList;
+ int reserved_zero;
+ Bitstream *s = p->bitstream;
+
+ assert (p != NULL);
+ assert (p->bitstream != NULL);
+ assert (p->bitstream->streamBuffer != 0);
+ assert (sps != NULL);
+
+ sps->profile_idc = u_v (8, "SPS: profile_idc" , s);
+
+ if ((sps->profile_idc!=BASELINE ) &&
+ (sps->profile_idc!=MAIN ) &&
+ (sps->profile_idc!=EXTENDED ) &&
+ (sps->profile_idc!=FREXT_HP ) &&
+ (sps->profile_idc!=FREXT_Hi10P ) &&
+ (sps->profile_idc!=FREXT_Hi422 ) &&
+ (sps->profile_idc!=FREXT_Hi444 ) &&
+ (sps->profile_idc!=FREXT_CAVLC444 ))
+ {
+ printf("Invalid Profile IDC (%d) encountered. \n", sps->profile_idc);
+ return;
+ }
+
+ sps->constrained_set0_flag = u_1 ( "SPS: constrained_set0_flag" , s);
+ sps->constrained_set1_flag = u_1 ( "SPS: constrained_set1_flag" , s);
+ sps->constrained_set2_flag = u_1 ( "SPS: constrained_set2_flag" , s);
+ sps->constrained_set3_flag = u_1 ( "SPS: constrained_set3_flag" , s);
+ reserved_zero = u_v (4, "SPS: reserved_zero_4bits" , s);
+ assert (reserved_zero==0);
+
+ sps->level_idc = u_v (8, "SPS: level_idc" , s);
+
+ sps->seq_parameter_set_id = ue_v ("SPS: seq_parameter_set_id" , s);
+
+ // Fidelity Range Extensions stuff
+ sps->chroma_format_idc = 1;
+ sps->bit_depth_luma_minus8 = 0;
+ sps->bit_depth_chroma_minus8 = 0;
+ p_Vid->lossless_qpprime_flag = 0;
+ sps->separate_colour_plane_flag = 0;
+
+ if((IS_FREXT_PROFILE(sps->profile_idc)))/*==FREXT_HP ) ||
+ (sps->profile_idc==FREXT_Hi10P) ||
+ (sps->profile_idc==FREXT_Hi422) ||
+ (sps->profile_idc==FREXT_Hi444) ||
+ (sps->profile_idc==FREXT_CAVLC444))*/
+ {
+ sps->chroma_format_idc = ue_v ("SPS: chroma_format_idc" , s);
+
+ if(sps->chroma_format_idc == YUV444)
+ {
+ sps->separate_colour_plane_flag = u_1 ("SPS: separate_colour_plane_flag" , s);
+ }
+
+ sps->bit_depth_luma_minus8 = ue_v ("SPS: bit_depth_luma_minus8" , s);
+ sps->bit_depth_chroma_minus8 = ue_v ("SPS: bit_depth_chroma_minus8" , s);
+ p_Vid->lossless_qpprime_flag = u_1 ("SPS: lossless_qpprime_y_zero_flag" , s);
+
+ sps->seq_scaling_matrix_present_flag = u_1 ( "SPS: seq_scaling_matrix_present_flag" , s);
+
+ if(sps->seq_scaling_matrix_present_flag)
+ {
+ n_ScalingList = (sps->chroma_format_idc != YUV444) ? 8 : 12;
+ for(i=0; i<n_ScalingList; i++)
+ {
+ sps->seq_scaling_list_present_flag[i] = u_1 ( "SPS: seq_scaling_list_present_flag" , s);
+ if(sps->seq_scaling_list_present_flag[i])
+ {
+ if(i<6)
+ Scaling_List(sps->ScalingList4x4[i], 16, &sps->UseDefaultScalingMatrix4x4Flag[i], s);
+ else
+ Scaling_List(sps->ScalingList8x8[i-6], 64, &sps->UseDefaultScalingMatrix8x8Flag[i-6], s);
+ }
+ }
+ }
+ }
+
+ sps->log2_max_frame_num_minus4 = ue_v ("SPS: log2_max_frame_num_minus4" , s);
+ sps->pic_order_cnt_type = ue_v ("SPS: pic_order_cnt_type" , s);
+
+ if (sps->pic_order_cnt_type == 0)
+ sps->log2_max_pic_order_cnt_lsb_minus4 = ue_v ("SPS: log2_max_pic_order_cnt_lsb_minus4" , s);
+ else if (sps->pic_order_cnt_type == 1)
+ {
+ sps->delta_pic_order_always_zero_flag = u_1 ("SPS: delta_pic_order_always_zero_flag" , s);
+ sps->offset_for_non_ref_pic = se_v ("SPS: offset_for_non_ref_pic" , s);
+ sps->offset_for_top_to_bottom_field = se_v ("SPS: offset_for_top_to_bottom_field" , s);
+ sps->num_ref_frames_in_pic_order_cnt_cycle = ue_v ("SPS: num_ref_frames_in_pic_order_cnt_cycle" , s);
+ for(i=0; i<sps->num_ref_frames_in_pic_order_cnt_cycle; i++)
+ sps->offset_for_ref_frame[i] = se_v ("SPS: offset_for_ref_frame[i]" , s);
+ }
+ sps->num_ref_frames = ue_v ("SPS: num_ref_frames" , s);
+ sps->gaps_in_frame_num_value_allowed_flag = u_1 ("SPS: gaps_in_frame_num_value_allowed_flag" , s);
+ sps->pic_width_in_mbs_minus1 = ue_v ("SPS: pic_width_in_mbs_minus1" , s);
+ sps->pic_height_in_map_units_minus1 = ue_v ("SPS: pic_height_in_map_units_minus1" , s);
+ sps->frame_mbs_only_flag = u_1 ("SPS: frame_mbs_only_flag" , s);
+ if (!sps->frame_mbs_only_flag)
+ {
+ sps->mb_adaptive_frame_field_flag = u_1 ("SPS: mb_adaptive_frame_field_flag" , s);
+ }
+ sps->direct_8x8_inference_flag = u_1 ("SPS: direct_8x8_inference_flag" , s);
+ sps->frame_cropping_flag = u_1 ("SPS: frame_cropping_flag" , s);
+
+ if (sps->frame_cropping_flag)
+ {
+ sps->frame_cropping_rect_left_offset = ue_v ("SPS: frame_cropping_rect_left_offset" , s);
+ sps->frame_cropping_rect_right_offset = ue_v ("SPS: frame_cropping_rect_right_offset" , s);
+ sps->frame_cropping_rect_top_offset = ue_v ("SPS: frame_cropping_rect_top_offset" , s);
+ sps->frame_cropping_rect_bottom_offset = ue_v ("SPS: frame_cropping_rect_bottom_offset" , s);
+ }
+ sps->vui_parameters_present_flag = (Boolean) u_1 ("SPS: vui_parameters_present_flag" , s);
+
+ InitVUI(sps);
+ ReadVUI(p, sps);
+
+ sps->Valid = TRUE;
+}
+
+
+void InitVUI(seq_parameter_set_rbsp_t *sps)
+{
+ sps->vui_seq_parameters.matrix_coefficients = 2;
+}
+
+
+int ReadVUI(DataPartition *p, seq_parameter_set_rbsp_t *sps)
+{
+ Bitstream *s = p->bitstream;
+ if (sps->vui_parameters_present_flag)
+ {
+ sps->vui_seq_parameters.aspect_ratio_info_present_flag = u_1 ("VUI: aspect_ratio_info_present_flag" , s);
+ if (sps->vui_seq_parameters.aspect_ratio_info_present_flag)
+ {
+ sps->vui_seq_parameters.aspect_ratio_idc = u_v ( 8, "VUI: aspect_ratio_idc" , s);
+ if (255==sps->vui_seq_parameters.aspect_ratio_idc)
+ {
+ sps->vui_seq_parameters.sar_width = (unsigned short) u_v (16, "VUI: sar_width" , s);
+ sps->vui_seq_parameters.sar_height = (unsigned short) u_v (16, "VUI: sar_height" , s);
+ }
+ }
+
+ sps->vui_seq_parameters.overscan_info_present_flag = u_1 ("VUI: overscan_info_present_flag" , s);
+ if (sps->vui_seq_parameters.overscan_info_present_flag)
+ {
+ sps->vui_seq_parameters.overscan_appropriate_flag = u_1 ("VUI: overscan_appropriate_flag" , s);
+ }
+
+ sps->vui_seq_parameters.video_signal_type_present_flag = u_1 ("VUI: video_signal_type_present_flag" , s);
+ if (sps->vui_seq_parameters.video_signal_type_present_flag)
+ {
+ sps->vui_seq_parameters.video_format = u_v ( 3,"VUI: video_format" , s);
+ sps->vui_seq_parameters.video_full_range_flag = u_1 ( "VUI: video_full_range_flag" , s);
+ sps->vui_seq_parameters.colour_description_present_flag = u_1 ( "VUI: color_description_present_flag" , s);
+ if(sps->vui_seq_parameters.colour_description_present_flag)
+ {
+ sps->vui_seq_parameters.colour_primaries = u_v ( 8,"VUI: colour_primaries" , s);
+ sps->vui_seq_parameters.transfer_characteristics = u_v ( 8,"VUI: transfer_characteristics" , s);
+ sps->vui_seq_parameters.matrix_coefficients = u_v ( 8,"VUI: matrix_coefficients" , s);
+ }
+ }
+ sps->vui_seq_parameters.chroma_location_info_present_flag = u_1 ( "VUI: chroma_loc_info_present_flag" , s);
+ if(sps->vui_seq_parameters.chroma_location_info_present_flag)
+ {
+ sps->vui_seq_parameters.chroma_sample_loc_type_top_field = ue_v ( "VUI: chroma_sample_loc_type_top_field" , s);
+ sps->vui_seq_parameters.chroma_sample_loc_type_bottom_field = ue_v ( "VUI: chroma_sample_loc_type_bottom_field" , s);
+ }
+ sps->vui_seq_parameters.timing_info_present_flag = u_1 ("VUI: timing_info_present_flag" , s);
+ if (sps->vui_seq_parameters.timing_info_present_flag)
+ {
+ sps->vui_seq_parameters.num_units_in_tick = u_v (32,"VUI: num_units_in_tick" , s);
+ sps->vui_seq_parameters.time_scale = u_v (32,"VUI: time_scale" , s);
+ sps->vui_seq_parameters.fixed_frame_rate_flag = u_1 ( "VUI: fixed_frame_rate_flag" , s);
+ }
+ sps->vui_seq_parameters.nal_hrd_parameters_present_flag = u_1 ("VUI: nal_hrd_parameters_present_flag" , s);
+ if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag)
+ {
+ ReadHRDParameters(p, &(sps->vui_seq_parameters.nal_hrd_parameters));
+ }
+ sps->vui_seq_parameters.vcl_hrd_parameters_present_flag = u_1 ("VUI: vcl_hrd_parameters_present_flag" , s);
+ if (sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)
+ {
+ ReadHRDParameters(p, &(sps->vui_seq_parameters.vcl_hrd_parameters));
+ }
+ if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag || sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)
+ {
+ sps->vui_seq_parameters.low_delay_hrd_flag = u_1 ("VUI: low_delay_hrd_flag" , s);
+ }
+ sps->vui_seq_parameters.pic_struct_present_flag = u_1 ("VUI: pic_struct_present_flag " , s);
+ sps->vui_seq_parameters.bitstream_restriction_flag = u_1 ("VUI: bitstream_restriction_flag" , s);
+ if (sps->vui_seq_parameters.bitstream_restriction_flag)
+ {
+ sps->vui_seq_parameters.motion_vectors_over_pic_boundaries_flag = u_1 ("VUI: motion_vectors_over_pic_boundaries_flag", s);
+ sps->vui_seq_parameters.max_bytes_per_pic_denom = ue_v ("VUI: max_bytes_per_pic_denom" , s);
+ sps->vui_seq_parameters.max_bits_per_mb_denom = ue_v ("VUI: max_bits_per_mb_denom" , s);
+ sps->vui_seq_parameters.log2_max_mv_length_horizontal = ue_v ("VUI: log2_max_mv_length_horizontal" , s);
+ sps->vui_seq_parameters.log2_max_mv_length_vertical = ue_v ("VUI: log2_max_mv_length_vertical" , s);
+ sps->vui_seq_parameters.num_reorder_frames = ue_v ("VUI: num_reorder_frames" , s);
+ sps->vui_seq_parameters.max_dec_frame_buffering = ue_v ("VUI: max_dec_frame_buffering" , s);
+ }
+ }
+
+ return 0;
+}
+
+
+int ReadHRDParameters(DataPartition *p, hrd_parameters_t *hrd)
+{
+ Bitstream *s = p->bitstream;
+ unsigned int SchedSelIdx;
+
+ hrd->cpb_cnt_minus1 = ue_v ( "VUI: cpb_cnt_minus1" , s);
+ hrd->bit_rate_scale = u_v ( 4,"VUI: bit_rate_scale" , s);
+ hrd->cpb_size_scale = u_v ( 4,"VUI: cpb_size_scale" , s);
+
+ for( SchedSelIdx = 0; SchedSelIdx <= hrd->cpb_cnt_minus1; SchedSelIdx++ )
+ {
+ hrd->bit_rate_value_minus1[ SchedSelIdx ] = ue_v ( "VUI: bit_rate_value_minus1" , s);
+ hrd->cpb_size_value_minus1[ SchedSelIdx ] = ue_v ( "VUI: cpb_size_value_minus1" , s);
+ hrd->cbr_flag[ SchedSelIdx ] = u_1 ( "VUI: cbr_flag" , s);
+ }
+
+ hrd->initial_cpb_removal_delay_length_minus1 = u_v ( 5,"VUI: initial_cpb_removal_delay_length_minus1" , s);
+ hrd->cpb_removal_delay_length_minus1 = u_v ( 5,"VUI: cpb_removal_delay_length_minus1" , s);
+ hrd->dpb_output_delay_length_minus1 = u_v ( 5,"VUI: dpb_output_delay_length_minus1" , s);
+ hrd->time_offset_length = u_v ( 5,"VUI: time_offset_length" , s);
+
+ return 0;
+}
+
+
+static void InterpretPPS (VideoParameters *p_Vid, DataPartition *p, pic_parameter_set_rbsp_t *pps)
+{
+ unsigned i;
+ unsigned n_ScalingList;
+ int chroma_format_idc;
+ int NumberBitsPerSliceGroupId;
+ Bitstream *s = p->bitstream;
+
+ assert (p != NULL);
+ assert (p->bitstream != NULL);
+ assert (p->bitstream->streamBuffer != 0);
+ assert (pps != NULL);
+
+ pps->pic_parameter_set_id = ue_v ("PPS: pic_parameter_set_id" , s);
+ pps->seq_parameter_set_id = ue_v ("PPS: seq_parameter_set_id" , s);
+ pps->entropy_coding_mode_flag = u_1 ("PPS: entropy_coding_mode_flag" , s);
+
+ //! Note: as per JVT-F078 the following bit is unconditional. If F078 is not accepted, then
+ //! one has to fetch the correct SPS to check whether the bit is present (hopefully there is
+ //! no consistency problem :-(
+ //! The current encoder code handles this in the same way. When you change this, don't forget
+ //! the encoder! StW, 12/8/02
+ pps->bottom_field_pic_order_in_frame_present_flag = u_1 ("PPS: bottom_field_pic_order_in_frame_present_flag" , s);
+
+ pps->num_slice_groups_minus1 = ue_v ("PPS: num_slice_groups_minus1" , s);
+
+ // FMO stuff begins here
+ if (pps->num_slice_groups_minus1 > 0)
+ {
+ pps->slice_group_map_type = ue_v ("PPS: slice_group_map_type" , s);
+ if (pps->slice_group_map_type == 0)
+ {
+ for (i=0; i<=pps->num_slice_groups_minus1; i++)
+ pps->run_length_minus1 [i] = ue_v ("PPS: run_length_minus1 [i]" , s);
+ }
+ else if (pps->slice_group_map_type == 2)
+ {
+ for (i=0; i<pps->num_slice_groups_minus1; i++)
+ {
+ //! JVT-F078: avoid reference of SPS by using ue(v) instead of u(v)
+ pps->top_left [i] = ue_v ("PPS: top_left [i]" , s);
+ pps->bottom_right [i] = ue_v ("PPS: bottom_right [i]" , s);
+ }
+ }
+ else if (pps->slice_group_map_type == 3 ||
+ pps->slice_group_map_type == 4 ||
+ pps->slice_group_map_type == 5)
+ {
+ pps->slice_group_change_direction_flag = u_1 ("PPS: slice_group_change_direction_flag" , s);
+ pps->slice_group_change_rate_minus1 = ue_v ("PPS: slice_group_change_rate_minus1" , s);
+ }
+ else if (pps->slice_group_map_type == 6)
+ {
+ if (pps->num_slice_groups_minus1+1 >4)
+ NumberBitsPerSliceGroupId = 3;
+ else if (pps->num_slice_groups_minus1+1 > 2)
+ NumberBitsPerSliceGroupId = 2;
+ else
+ NumberBitsPerSliceGroupId = 1;
+ pps->pic_size_in_map_units_minus1 = ue_v ("PPS: pic_size_in_map_units_minus1" , s);
+ if ((pps->slice_group_id = calloc (pps->pic_size_in_map_units_minus1+1, 1)) == NULL)
+ no_mem_exit ("InterpretPPS: slice_group_id");
+ for (i=0; i<=pps->pic_size_in_map_units_minus1; i++)
+ pps->slice_group_id[i] = (byte) u_v (NumberBitsPerSliceGroupId, "slice_group_id[i]", s);
+ }
+ }
+
+ // End of FMO stuff
+
+ pps->num_ref_idx_l0_active_minus1 = ue_v ("PPS: num_ref_idx_l0_active_minus1" , s);
+ pps->num_ref_idx_l1_active_minus1 = ue_v ("PPS: num_ref_idx_l1_active_minus1" , s);
+ pps->weighted_pred_flag = u_1 ("PPS: weighted_pred_flag" , s);
+ pps->weighted_bipred_idc = u_v ( 2, "PPS: weighted_bipred_idc" , s);
+ pps->pic_init_qp_minus26 = se_v ("PPS: pic_init_qp_minus26" , s);
+ pps->pic_init_qs_minus26 = se_v ("PPS: pic_init_qs_minus26" , s);
+
+ pps->chroma_qp_index_offset = se_v ("PPS: chroma_qp_index_offset" , s);
+
+ pps->deblocking_filter_control_present_flag = u_1 ("PPS: deblocking_filter_control_present_flag" , s);
+ pps->constrained_intra_pred_flag = u_1 ("PPS: constrained_intra_pred_flag" , s);
+ pps->redundant_pic_cnt_present_flag = u_1 ("PPS: redundant_pic_cnt_present_flag" , s);
+
+ if(more_rbsp_data(s->streamBuffer, s->frame_bitoffset,s->bitstream_length)) // more_data_in_rbsp()
+ {
+ //Fidelity Range Extensions Stuff
+ pps->transform_8x8_mode_flag = u_1 ("PPS: transform_8x8_mode_flag" , s);
+ pps->pic_scaling_matrix_present_flag = u_1 ("PPS: pic_scaling_matrix_present_flag" , s);
+
+ if(pps->pic_scaling_matrix_present_flag)
+ {
+ chroma_format_idc = p_Vid->SeqParSet[pps->seq_parameter_set_id].chroma_format_idc;
+ n_ScalingList = 6 + ((chroma_format_idc != YUV444) ? 2 : 6) * pps->transform_8x8_mode_flag;
+ for(i=0; i<n_ScalingList; i++)
+ {
+ pps->pic_scaling_list_present_flag[i]= u_1 ("PPS: pic_scaling_list_present_flag" , s);
+
+ if(pps->pic_scaling_list_present_flag[i])
+ {
+ if(i<6)
+ Scaling_List(pps->ScalingList4x4[i], 16, &pps->UseDefaultScalingMatrix4x4Flag[i], s);
+ else
+ Scaling_List(pps->ScalingList8x8[i-6], 64, &pps->UseDefaultScalingMatrix8x8Flag[i-6], s);
+ }
+ }
+ }
+ pps->second_chroma_qp_index_offset = se_v ("PPS: second_chroma_qp_index_offset" , s);
+ }
+ else
+ {
+ pps->second_chroma_qp_index_offset = pps->chroma_qp_index_offset;
+ }
+
+ pps->Valid = TRUE;
+}
+
+
+void PPSConsistencyCheck (pic_parameter_set_rbsp_t *pps)
+{
+ printf ("Consistency checking a picture parset, to be implemented\n");
+// if (pps->seq_parameter_set_id invalid then do something)
+}
+
+void SPSConsistencyCheck (seq_parameter_set_rbsp_t *sps)
+{
+ printf ("Consistency checking a sequence parset, to be implemented\n");
+}
+
+void MakePPSavailable (VideoParameters *p_Vid, int id, pic_parameter_set_rbsp_t *pps)
+{
+ assert (pps->Valid == TRUE);
+
+ if (p_Vid->PicParSet[id].Valid == TRUE && p_Vid->PicParSet[id].slice_group_id != NULL)
+ free (p_Vid->PicParSet[id].slice_group_id);
+
+ memcpy (&p_Vid->PicParSet[id], pps, sizeof (pic_parameter_set_rbsp_t));
+
+ // we can simply use the memory provided with the pps. the PPS is destroyed after this function
+ // call and will not try to free if pps->slice_group_id == NULL
+ p_Vid->PicParSet[id].slice_group_id = pps->slice_group_id;
+ pps->slice_group_id = NULL;
+}
+
+void CleanUpPPS(VideoParameters *p_Vid)
+{
+ int i;
+
+ for (i=0; i<MAXPPS; i++)
+ {
+ if (p_Vid->PicParSet[i].Valid == TRUE && p_Vid->PicParSet[i].slice_group_id != NULL)
+ free (p_Vid->PicParSet[i].slice_group_id);
+
+ p_Vid->PicParSet[i].Valid = FALSE;
+ }
+}
+
+
+void MakeSPSavailable (VideoParameters *p_Vid, int id, seq_parameter_set_rbsp_t *sps)
+{
+ assert (sps->Valid == TRUE);
+ memcpy (&p_Vid->SeqParSet[id], sps, sizeof (seq_parameter_set_rbsp_t));
+}
+
+void ProcessSPS_Memory(VideoParameters *p_Vid, const void *buffer, size_t bufferlen)
+{
+
+}
+
+void ProcessSPS(VideoParameters *p_Vid, NALU_t *nalu)
+{
+ DataPartition *dp = AllocPartition(1);
+ seq_parameter_set_rbsp_t *sps = AllocSPS();
+
+ //memcpy (dp->bitstream->streamBuffer, buffer, bufferlen);
+ dp->bitstream->streamBuffer = &nalu->buf[1];
+ dp->bitstream->code_len = dp->bitstream->bitstream_length = RBSPtoSODB (dp->bitstream->streamBuffer, nalu->len-1);
+ dp->bitstream->read_len = dp->bitstream->frame_bitoffset = 0;
+ InterpretSPS (p_Vid, dp, sps);
+
+ if (sps->Valid)
+ {
+ if (p_Vid->active_sps)
+ {
+ if (sps->seq_parameter_set_id == p_Vid->active_sps->seq_parameter_set_id)
+ {
+ if (!sps_is_equal(sps, p_Vid->active_sps))
+ {
+ if (p_Vid->dec_picture)
+ {
+ // this may only happen on slice loss
+ exit_picture(p_Vid, &p_Vid->dec_picture);
+ }
+ p_Vid->active_sps=NULL;
+ }
+ }
+ }
+ // SPSConsistencyCheck (pps);
+ MakeSPSavailable (p_Vid, sps->seq_parameter_set_id, sps);
+ p_Vid->profile_idc = sps->profile_idc;
+ p_Vid->separate_colour_plane_flag = sps->separate_colour_plane_flag;
+ if( p_Vid->separate_colour_plane_flag )
+ {
+ p_Vid->ChromaArrayType = 0;
+ }
+ else
+ {
+ p_Vid->ChromaArrayType = sps->chroma_format_idc;
+ }
+ }
+
+ FreePartition (dp, 1);
+ FreeSPS (sps);
+
+}
+
+void ProcessPPS (VideoParameters *p_Vid, NALU_t *nalu)
+{
+ DataPartition *dp = AllocPartition(1);
+ pic_parameter_set_rbsp_t *pps = AllocPPS();
+
+ //memcpy (dp->bitstream->streamBuffer, &nalu->buf[1], nalu->len-1);
+ dp->bitstream->streamBuffer = &nalu->buf[1];
+ dp->bitstream->code_len = dp->bitstream->bitstream_length = RBSPtoSODB (dp->bitstream->streamBuffer, nalu->len-1);
+ dp->bitstream->read_len = dp->bitstream->frame_bitoffset = 0;
+ InterpretPPS (p_Vid, dp, pps);
+ // PPSConsistencyCheck (pps);
+ if (p_Vid->active_pps)
+ {
+ if (pps->pic_parameter_set_id == p_Vid->active_pps->pic_parameter_set_id)
+ {
+ if (!pps_is_equal(pps, p_Vid->active_pps))
+ {
+ if (p_Vid->dec_picture)
+ {
+ // this may only happen on slice loss
+ exit_picture(p_Vid, &p_Vid->dec_picture);
+ }
+ p_Vid->active_pps = NULL;
+ }
+ }
+ }
+ MakePPSavailable (p_Vid, pps->pic_parameter_set_id, pps);
+ FreePartition (dp, 1);
+ FreePPS (pps);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Updates images max values
+ *
+ ************************************************************************
+ */
+static void updateMaxValue(FrameFormat *format)
+{
+ format->max_value[0] = (1 << format->bit_depth[0]) - 1;
+ format->max_value_sq[0] = format->max_value[0] * format->max_value[0];
+ format->max_value[1] = (1 << format->bit_depth[1]) - 1;
+ format->max_value_sq[1] = format->max_value[1] * format->max_value[1];
+ format->max_value[2] = (1 << format->bit_depth[2]) - 1;
+ format->max_value_sq[2] = format->max_value[2] * format->max_value[2];
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Reset format information
+ *
+ ************************************************************************
+ */
+static void reset_format_info(seq_parameter_set_rbsp_t *sps, VideoParameters *p_Vid, FrameFormat *output)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+ static const int SubWidthC [4]= { 1, 2, 2, 1};
+ static const int SubHeightC [4]= { 1, 2, 1, 1};
+
+ int crop_left, crop_right;
+ int crop_top, crop_bottom;
+
+ // cropping for luma
+ if (sps->frame_cropping_flag)
+ {
+ crop_left = SubWidthC [sps->chroma_format_idc] * sps->frame_cropping_rect_left_offset;
+ crop_right = SubWidthC [sps->chroma_format_idc] * sps->frame_cropping_rect_right_offset;
+ crop_top = SubHeightC[sps->chroma_format_idc] * ( 2 - sps->frame_mbs_only_flag ) * sps->frame_cropping_rect_top_offset;
+ crop_bottom = SubHeightC[sps->chroma_format_idc] * ( 2 - sps->frame_mbs_only_flag ) * sps->frame_cropping_rect_bottom_offset;
+ }
+ else
+ {
+ crop_left = crop_right = crop_top = crop_bottom = 0;
+ }
+
+ output->width_crop = p_Vid->width - crop_left - crop_right;
+ output->height_crop = p_Vid->height - crop_top - crop_bottom;
+
+ output->width = p_Vid->width;
+ output->height = p_Vid->height;
+
+ output->width_cr = p_Vid->width_cr;
+ output->height_cr = p_Vid->height_cr;
+
+ // output size (excluding padding)
+ output->size_cmp[0] = output->width * output->height;
+ output->size_cmp[1] = output->width_cr * output->height_cr;
+ output->size_cmp[2] = output->size_cmp[1];
+ output->size = output->size_cmp[0] + output->size_cmp[1] + output->size_cmp[2];
+ output->mb_width = output->width / MB_BLOCK_SIZE;
+ output->mb_height = output->height / MB_BLOCK_SIZE;
+
+ output->bit_depth[0] = p_Vid->bitdepth_luma;
+ output->bit_depth[1] = p_Vid->bitdepth_chroma;
+ output->bit_depth[2] = p_Vid->bitdepth_chroma;
+
+ output->yuv_format = (ColorFormat) sps->chroma_format_idc;
+
+ updateMaxValue(output);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Activate Sequence Parameter Sets
+ *
+ ************************************************************************
+ */
+void activate_sps (VideoParameters *p_Vid, seq_parameter_set_rbsp_t *sps)
+{
+ InputParameters *p_Inp = p_Vid->p_Inp;
+
+ if (p_Vid->active_sps != sps)
+ {
+ if (p_Vid->dec_picture)
+ {
+ // this may only happen on slice loss
+ exit_picture(p_Vid, &p_Vid->dec_picture);
+ }
+ p_Vid->active_sps = sps;
+
+ p_Vid->bitdepth_chroma = 0;
+ p_Vid->width_cr = 0;
+ p_Vid->height_cr = 0;
+
+ // maximum vertical motion vector range in luma quarter pixel units
+ if (p_Vid->active_sps->level_idc <= 10)
+ {
+ p_Vid->max_vmv_r = 64 * 4;
+ }
+ else if (p_Vid->active_sps->level_idc <= 20)
+ {
+ p_Vid->max_vmv_r = 128 * 4;
+ }
+ else if (p_Vid->active_sps->level_idc <= 30)
+ {
+ p_Vid->max_vmv_r = 256 * 4;
+ }
+ else
+ {
+ p_Vid->max_vmv_r = 512 * 4; // 512 pixels in quarter pixels
+ }
+
+ // Fidelity Range Extensions stuff (part 1)
+ p_Vid->bitdepth_luma = (short) (sps->bit_depth_luma_minus8 + 8);
+ p_Vid->bitdepth_scale[0] = 1 << sps->bit_depth_luma_minus8;
+ if (sps->chroma_format_idc != YUV400)
+ {
+ p_Vid->bitdepth_chroma = (short) (sps->bit_depth_chroma_minus8 + 8);
+ p_Vid->bitdepth_scale[1] = 1 << sps->bit_depth_chroma_minus8;
+ }
+
+ p_Vid->MaxFrameNum = 1<<(sps->log2_max_frame_num_minus4+4);
+ p_Vid->PicWidthInMbs = (sps->pic_width_in_mbs_minus1 +1);
+ p_Vid->PicHeightInMapUnits = (sps->pic_height_in_map_units_minus1 +1);
+ p_Vid->FrameHeightInMbs = ( 2 - sps->frame_mbs_only_flag ) * p_Vid->PicHeightInMapUnits;
+ p_Vid->FrameSizeInMbs = p_Vid->PicWidthInMbs * p_Vid->FrameHeightInMbs;
+
+ p_Vid->yuv_format=sps->chroma_format_idc;
+
+ p_Vid->width = p_Vid->PicWidthInMbs * MB_BLOCK_SIZE;
+ p_Vid->height = p_Vid->FrameHeightInMbs * MB_BLOCK_SIZE;
+
+ if (sps->chroma_format_idc == YUV420)
+ {
+ p_Vid->width_cr = (p_Vid->width >> 1);
+ p_Vid->height_cr = (p_Vid->height >> 1);
+ }
+ else if (sps->chroma_format_idc == YUV422)
+ {
+ p_Vid->width_cr = (p_Vid->width >> 1);
+ p_Vid->height_cr = p_Vid->height;
+ }
+ else if (sps->chroma_format_idc == YUV444)
+ {
+ //YUV444
+ p_Vid->width_cr = p_Vid->width;
+ p_Vid->height_cr = p_Vid->height;
+ }
+
+ init_frext(p_Vid);
+ init_global_buffers(p_Vid);
+
+ if (!p_Vid->no_output_of_prior_pics_flag)
+ {
+ flush_dpb(p_Vid);
+ }
+ init_dpb(p_Vid);
+
+ ercInit(p_Vid, p_Vid->width, p_Vid->height, 1);
+ image_cache_set_dimensions(&p_Vid->image_cache[0], p_Vid->width, p_Vid->height);
+ image_cache_set_dimensions(&p_Vid->image_cache[1], p_Vid->width_cr, p_Vid->height_cr);
+ motion_cache_set_dimensions(&p_Vid->motion_cache, p_Vid->width / BLOCK_SIZE, p_Vid->height / BLOCK_SIZE);
+
+ }
+
+ reset_format_info(sps, p_Vid, &p_Inp->output);
+
+}
+
+void activate_pps(VideoParameters *p_Vid, pic_parameter_set_rbsp_t *pps)
+{
+ if (p_Vid->active_pps != pps)
+ {
+ if (p_Vid->dec_picture)
+ {
+ // this may only happen on slice loss
+ exit_picture(p_Vid, &p_Vid->dec_picture);
+ }
+
+ p_Vid->active_pps = pps;
+
+ // Fidelity Range Extensions stuff (part 2)
+ p_Vid->Transform8x8Mode = pps->transform_8x8_mode_flag;
+
+ }
+}
+
+void UseParameterSet (Slice *currSlice, int PicParsetId)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ seq_parameter_set_rbsp_t *sps = &p_Vid->SeqParSet[p_Vid->PicParSet[PicParsetId].seq_parameter_set_id];
+ pic_parameter_set_rbsp_t *pps = &p_Vid->PicParSet[PicParsetId];
+
+ if (p_Vid->PicParSet[PicParsetId].Valid != TRUE)
+ printf ("Trying to use an invalid (uninitialized) Picture Parameter Set with ID %d, expect the unexpected...\n", PicParsetId);
+ if (p_Vid->SeqParSet[p_Vid->PicParSet[PicParsetId].seq_parameter_set_id].Valid != TRUE)
+ printf ("PicParset %d references an invalid (uninitialized) Sequence Parameter Set with ID %d, expect the unexpected...\n", PicParsetId, (int) p_Vid->PicParSet[PicParsetId].seq_parameter_set_id);
+
+ sps = &p_Vid->SeqParSet[p_Vid->PicParSet[PicParsetId].seq_parameter_set_id];
+
+
+ // In theory, and with a well-designed software, the lines above
+ // are everything necessary. In practice, we need to patch many values
+ // in p_Vid-> (but no more in p_Inp-> -- these have been taken care of)
+
+ // Sequence Parameter Set Stuff first
+
+// printf ("Using Picture Parameter set %d and associated Sequence Parameter Set %d\n", PicParsetId, p_Vid->PicParSet[PicParsetId].seq_parameter_set_id);
+
+ if ((int) sps->pic_order_cnt_type < 0 || sps->pic_order_cnt_type > 2) // != 1
+ {
+ printf ("invalid sps->pic_order_cnt_type = %d\n", (int) sps->pic_order_cnt_type);
+ error ("pic_order_cnt_type != 1", -1000);
+ }
+
+ if (sps->pic_order_cnt_type == 1)
+ {
+ if(sps->num_ref_frames_in_pic_order_cnt_cycle >= MAXnum_ref_frames_in_pic_order_cnt_cycle)
+ {
+ error("num_ref_frames_in_pic_order_cnt_cycle too large",-1011);
+ }
+ }
+
+ activate_sps(p_Vid, sps);
+ activate_pps(p_Vid, pps);
+
+ // currSlice->dp_mode is set by read_new_slice (NALU first byte available there)
+ if (pps->entropy_coding_mode_flag == CAVLC)
+ {
+ currSlice->nal_startcode_follows = uvlc_startcode_follows;
+ }
+ else
+ {
+ currSlice->nal_startcode_follows = cabac_startcode_follows;
+ }
+}
+
diff --git a/Src/h264dec/ldecod/src/prediction.asm b/Src/h264dec/ldecod/src/prediction.asm
new file mode 100644
index 00000000..33d3d499
--- /dev/null
+++ b/Src/h264dec/ldecod/src/prediction.asm
@@ -0,0 +1,1626 @@
+.686
+.XMM
+.model FLAT
+
+copy_image_data_16x16_stride@OptimizedFunctions = 32
+dec_picture@VideoParameters = 698192
+p_Slice@MacroBlock = 0
+plane_images@StorablePicture = 158512
+mb_rec@Slice = 1696
+mb_pred@Slice = 928
+cof@Slice = 2464
+
+CONST SEGMENT
+align 16
+const32 DW 020H, 020H, 020H, 020H, 020H, 020H, 020H, 020H
+CONST ENDS
+
+;
+;
+;
+;
+
+PUBLIC _weighted_bi_prediction4x4
+_TEXT SEGMENT
+mb_pred = 4
+block_l0 = 8
+wp_scale_l0 = 12
+wp_scale_l1 = 16
+wp_offset = 20
+weight_denom = 24
+_weighted_bi_prediction4x4 PROC ; COMDAT
+ mov eax, DWORD PTR weight_denom[esp]
+ pxor mm0, mm0
+ pshufw mm1, MMWORD PTR wp_scale_l0[esp], 0
+ test eax, eax
+ pshufw mm2, MMWORD PTR wp_scale_l1[esp], 0
+ pshufw mm3, MMWORD PTR wp_offset[esp], 0
+ jle BI_PRED4x4@LEFT_SHIFT
+
+ movd mm4, eax
+ lea ecx, DWORD PTR [eax-1] ;
+ mov edx, 1
+ shl edx, cl
+ movd mm5, edx
+ mov eax, mb_pred[esp]
+ mov edx, block_l0[esp]
+ pshufw mm5, mm5, 0
+ movd mm6, DWORD PTR 0[edx] ; block_l0
+ movd mm7, DWORD PTR 0[eax] ; mb_pred
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ movd mm7, DWORD PTR 16[eax] ; mb_pred
+ paddw mm6, mm5
+ psraw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 0[eax], mm6
+
+ movd mm6, DWORD PTR 16[edx] ; block_l0
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ movd mm7, DWORD PTR 32[eax] ; mb_pred
+ paddw mm6, mm5
+ psraw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 16[eax], mm6
+
+ movd mm6, DWORD PTR 32[edx] ; block_l0
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ movd mm7, DWORD PTR 48[eax] ; mb_pred
+ paddw mm6, mm5
+ psraw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 0[eax], mm6
+
+ movd mm6, DWORD PTR 48[edx] ; block_l0
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ paddw mm6, mm5
+ psraw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 48[eax], mm6
+ ret 0
+
+BI_PRED4x4@LEFT_SHIFT:
+ neg eax
+ movd mm4, eax
+ mov eax, mb_pred[esp]
+ mov edx, block_l0[esp]
+ movd mm6, DWORD PTR 0[edx] ; block_l0
+ movd mm7, DWORD PTR 0[eax] ; mb_pred
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ movd mm7, DWORD PTR 16[eax] ; mb_pred
+ psllw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 0[eax], mm6
+
+ movd mm6, DWORD PTR 16[edx] ; block_l0
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ movd mm7, DWORD PTR 32[eax] ; mb_pred
+ psllw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 16[eax], mm6
+
+ movd mm6, DWORD PTR 32[edx] ; block_l0
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ movd mm7, DWORD PTR 48[eax] ; mb_pred
+ psllw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 0[eax], mm6
+
+ movd mm6, DWORD PTR 48[edx] ; block_l0
+ punpcklbw mm6, mm0
+ punpcklbw mm7, mm0
+ pmullw mm6, mm1
+ pmullw mm7, mm2
+ paddw mm6, mm7
+ psllw mm6, mm4
+ paddw mm6, mm3
+ packuswb mm6, mm6
+ movd DWORD PTR 48[eax], mm6
+ ret 0
+_weighted_bi_prediction4x4 ENDP
+_TEXT ENDS
+
+PUBLIC _itrans4x4_mmx
+_TEXT SEGMENT
+_tblock$ = 4 ; size = 4
+_mb_pred$ = 8 ; size = 4
+_mb_rec$ = 12 ; size = 4
+_pos_x$ = 16 ; size = 4
+_pos_y$ = 20 ; size = 4
+_itrans4x4_mmx PROC ; COMDAT
+
+ mov edx, DWORD PTR _pos_y$[esp]
+ shl edx, 4
+ add edx, DWORD PTR _pos_x$[esp]
+ mov eax, DWORD PTR _tblock$[esp]
+ mov ecx, DWORD PTR _mb_pred$[esp]
+ add ecx, edx
+ add edx, DWORD PTR _mb_rec$[esp]
+_itrans4x4_mmx_direct PROC ; COMDAT
+ ; load 4x4 matrix
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 8[eax]
+ movq mm2, MMWORD PTR 16[eax]
+ movq mm3, MMWORD PTR 24[eax]
+
+ ; rotate 4x4 matrix
+ movq mm4, mm0 ; p0 = mm4 (copy)
+ punpcklwd mm0, mm2 ; r0 = mm0
+ punpckhwd mm4, mm2 ; r2 = mm4
+ movq mm5, mm1 ; p1 = mm5 (copy)
+ punpcklwd mm1, mm3 ; r1 = mm1
+ punpckhwd mm5, mm3 ; r3 = mm5
+ movq mm6, mm0 ; r0 = mm6 (copy)
+ punpcklwd mm0, mm1 ; t0 = mm0
+ punpckhwd mm6, mm1 ; t1 = mm6
+ movq mm1, mm4 ; r2 = mm1 (copy)
+ punpcklwd mm1, mm5 ; t2 = mm1
+ punpckhwd mm4, mm5 ; t3 = mm4
+
+ movq mm2, mm0 ; mm2 = t0 (copy)
+ paddw mm0, mm1 ; mm0 = p0
+ psubw mm2, mm1 ; mm2 = p1, mm1 available
+ movq mm5, mm6 ; mm5 = t1 (copy)
+ psraw mm5, 1 ; mm5 = (t1 >> 1)
+ psubw mm5, mm4 ; mm5 = p2
+ psraw mm4, 1 ; mm4 = (t3 >> 1)
+ paddw mm6, mm4 ; mm6 = p3
+
+ movq mm3, mm0 ; mm3 = p0 (copy)
+ paddw mm0, mm6 ; mm0 = r0
+ movq mm1, mm2 ; mm1 = p1 (copy)
+ paddw mm1, mm5 ; mm1 = r1
+ psubw mm2, mm5 ; mm2 = r2, mm5 available
+ psubw mm3, mm6 ; mm3 = r3
+
+ ; rotate 4x4 matrix to set up for vertical
+ movq mm4, mm0 ; r0 = mm4 (copy)
+ punpcklwd mm0, mm2 ; p0 = mm0
+ punpckhwd mm4, mm2 ; p2 = mm4
+ movq mm5, mm1 ; r1 = mm5 (copy)
+ punpcklwd mm1, mm3 ; p1 = mm1
+ punpckhwd mm5, mm3 ; p3 = mm5
+ movq mm6, mm0 ; p0 = mm6 (copy)
+ punpcklwd mm0, mm1 ; t0 = mm0
+ punpckhwd mm6, mm1 ; t1 = mm6
+ movq mm1, mm4 ; p2 = mm1 (copy)
+ punpcklwd mm1, mm5 ; t2 = mm1
+ punpckhwd mm4, mm5 ; t3 = mm4
+
+ movq mm2, mm0 ; mm2 = t0 (copy)
+ paddw mm0, mm1 ; mm0 = p0
+ psubw mm2, mm1 ; mm2 = p1, mm1 available
+ movq mm5, mm6 ; mm5 = t1 (copy)
+ psraw mm5, 1 ; mm5 = (t1 >> 1)
+ psubw mm5, mm4 ; mm5 = p2
+ psraw mm4, 1 ; mm4 = (t3 >> 1)
+ paddw mm6, mm4 ; mm6 = p3
+ movq mm3, mm0 ; mm3 = p0 (copy)
+ paddw mm0, mm6 ; mm0 = r0
+ movq mm1, mm2 ; mm1 = p1 (copy)
+ paddw mm1, mm5 ; mm1 = r1
+ psubw mm2, mm5 ; mm2 = r2, mm5 available
+ psubw mm3, mm6 ; mm3 = r3
+
+
+; --- 4x4 iDCT done, now time to combine with mpr ---
+
+ movq mm7, MMWORD PTR const32
+
+ paddw mm0, mm7 ; rres + 32
+ psraw mm0, 6 ; (rres + 32) >> 6
+ paddw mm1, mm7 ; rres + 32
+ psraw mm1, 6 ; (rres + 32) >> 6
+ paddw mm2, mm7 ; rres + 32
+ psraw mm2, 6 ; (rres + 32) >> 6
+ paddw mm3, mm7 ; rres + 32
+ psraw mm3, 6 ; (rres + 32) >> 6
+
+ pxor mm7, mm7
+
+ ; convert mpr from unsigned char to short
+ movd mm4, DWORD PTR 0[ecx]
+ movd mm5, DWORD PTR 16[ecx]
+ movd mm6, DWORD PTR 32[ecx]
+ punpcklbw mm4, mm7
+ punpcklbw mm5, mm7
+ punpcklbw mm6, mm7
+ paddsw mm4, mm0 ; pred_row + rres_row
+ movd mm0, DWORD PTR 48[ecx] ; reuse mm0 for mpr[3]
+ paddsw mm5, mm1 ; pred_row + rres_row
+ punpcklbw mm0, mm7
+ paddsw mm6, mm2 ; pred_row + rres_row
+ paddsw mm0, mm3 ; pred_row + rres_row
+ ; results in mm4, mm5, mm6, mm0
+
+ ; move back to 8 bit
+ packuswb mm4, mm7
+ packuswb mm5, mm7
+ packuswb mm6, mm7
+ packuswb mm0, mm7
+ movd DWORD PTR 0[edx], mm4
+ movd DWORD PTR 16[edx], mm5
+ movd DWORD PTR 32[edx], mm6
+ movd DWORD PTR 48[edx], mm0
+ ret 0
+
+_itrans4x4_mmx_direct ENDP
+_itrans4x4_mmx ENDP
+_TEXT ENDS
+
+EXTRN _itrans_sp:PROC
+EXTRN _Inv_Residual_trans_4x4:PROC
+PUBLIC _iMBtrans4x4
+EXTRN _opt:BYTE
+_TEXT SEGMENT
+_currSlice$ = -4 ; size = 4
+_mb_rec$166704 = 8 ; size = 4
+_currMB$ = 8 ; size = 4
+_curr_img$ = 12 ; size = 4
+_pl$ = 8 ; second parameter
+_smb$ = 16 ; size = 4
+_iMBtrans4x4 PROC
+ push ecx
+ push ebx
+ push ebp
+ push esi
+STACKOFFSET = 16
+; 408 : VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1]: dec_picture->imgY;
+
+ mov esi, DWORD PTR _pl$[esp+STACKOFFSET]
+ push edi
+STACKOFFSET = STACKOFFSET + 4
+ mov edi, DWORD PTR _currMB$[esp+16]
+ mov ebp, DWORD PTR [edi+p_Slice@MacroBlock] ; ebp: currMB->p_Slice
+ mov eax, DWORD PTR [edi+4]
+ mov eax, DWORD PTR [eax+dec_picture@VideoParameters] ; eax: p_Vid->dec_picture;
+ mov DWORD PTR _currSlice$[esp+20], ebp
+ mov ecx, DWORD PTR [eax+esi*4+plane_images@StorablePicture]
+ mov DWORD PTR _curr_img$[esp+16], ecx
+
+ cmp DWORD PTR _smb$[esp+16], 0 ; if (smb)
+; 413 : {
+; 414 : h264_short_block_t *blocks = currSlice->cof4[pl];
+; 415 : const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+; 416 :
+; 417 : itrans_sp(blocks[0], mb_pred, currMB, pl, 0, 0);
+
+ je $LN4@iMBtrans4x
+ push 0
+ push 0
+ mov eax, esi
+ shl eax, 9
+ lea ebx, DWORD PTR [eax+ebp+cof@Slice]
+ mov ecx, esi
+ shl ecx, 8
+ lea ebp, DWORD PTR [ecx+ebp+mb_pred@Slice]
+ push esi
+ push ebp
+ push ebx
+ mov eax, edi
+ call _itrans_sp
+
+; 418 : itrans_sp(blocks[1], mb_pred, currMB, pl, 4, 0);
+
+ push 0
+ push 4
+ push esi
+ lea edx, DWORD PTR [ebx+32]
+ push ebp
+ push edx
+ mov eax, edi
+ call _itrans_sp
+
+; 419 : itrans_sp(blocks[2], mb_pred, currMB, pl, 0, 4);
+
+ push 4
+ push 0
+ push esi
+ lea eax, DWORD PTR [ebx+64]
+ push ebp
+ push eax
+ mov eax, edi
+ call _itrans_sp
+
+; 420 : itrans_sp(blocks[3], mb_pred, currMB, pl, 4, 4);
+
+ push 4
+ push 4
+ push esi
+ lea ecx, DWORD PTR [ebx+96]
+ push ebp
+ push ecx
+ mov eax, edi
+ call _itrans_sp
+ add esp, 80 ; 00000050H
+
+; 421 : itrans_sp(blocks[4], mb_pred, currMB, pl, 8, 0);
+
+ push 0
+ push 8
+ push esi
+ lea edx, DWORD PTR [ebx+128]
+ push ebp
+ push edx
+ mov eax, edi
+ call _itrans_sp
+
+; 422 : itrans_sp(blocks[5], mb_pred, currMB, pl, 12, 0);
+
+ push 0
+ push 12 ; 0000000cH
+ push esi
+ lea eax, DWORD PTR [ebx+160]
+ push ebp
+ push eax
+ mov eax, edi
+ call _itrans_sp
+
+; 423 : itrans_sp(blocks[6], mb_pred, currMB, pl, 8, 4);
+
+ push 4
+ push 8
+ push esi
+ lea ecx, DWORD PTR [ebx+192]
+ push ebp
+ push ecx
+ mov eax, edi
+ call _itrans_sp
+
+; 424 : itrans_sp(blocks[7], mb_pred, currMB, pl, 12, 4);
+
+ push 4
+ push 12 ; 0000000cH
+ push esi
+ lea edx, DWORD PTR [ebx+224]
+ push ebp
+ push edx
+ mov eax, edi
+ call _itrans_sp
+ add esp, 80 ; 00000050H
+
+; 425 : itrans_sp(blocks[8], mb_pred, currMB, pl, 0, 8);
+
+ push 8
+ push 0
+ push esi
+ lea eax, DWORD PTR [ebx+256]
+ push ebp
+ push eax
+ mov eax, edi
+ call _itrans_sp
+
+; 426 : itrans_sp(blocks[9], mb_pred, currMB, pl, 4, 8);
+
+ push 8
+ push 4
+ push esi
+ push ebp
+ lea ecx, DWORD PTR [ebx+288]
+ push ecx
+ mov eax, edi
+ call _itrans_sp
+
+; 427 : itrans_sp(blocks[10], mb_pred, currMB, pl, 0, 12);
+
+ push 12 ; 0000000cH
+ push 0
+ push esi
+ lea edx, DWORD PTR [ebx+320]
+ push ebp
+ push edx
+ mov eax, edi
+ call _itrans_sp
+
+; 428 : itrans_sp(blocks[11], mb_pred, currMB, pl, 4, 12);
+
+ push 12 ; 0000000cH
+ push 4
+ push esi
+ lea eax, DWORD PTR [ebx+352]
+ push ebp
+ push eax
+ mov eax, edi
+ call _itrans_sp
+ add esp, 80 ; 00000050H
+
+; 429 : itrans_sp(blocks[12], mb_pred, currMB, pl, 8, 8);
+
+ push 8
+ push 8
+ push esi
+ lea ecx, DWORD PTR [ebx+384]
+ push ebp
+ push ecx
+ mov eax, edi
+ call _itrans_sp
+
+; 430 : itrans_sp(blocks[13], mb_pred, currMB, pl, 12, 8);
+
+ push 8
+ push 12 ; 0000000cH
+ push esi
+ lea edx, DWORD PTR [ebx+416]
+ push ebp
+ push edx
+ mov eax, edi
+ call _itrans_sp
+
+; 431 : itrans_sp(blocks[14], mb_pred, currMB, pl, 8, 12);
+
+ push 12 ; 0000000cH
+ push 8
+ push esi
+ lea eax, DWORD PTR [ebx+448]
+ push ebp
+ push eax
+ mov eax, edi
+ call _itrans_sp
+
+; 432 : itrans_sp(blocks[15], mb_pred, currMB, pl, 12, 12);
+
+ push 12 ; 0000000cH
+ push 12 ; 0000000cH
+ push esi
+ add ebx, 480 ; 000001e0H
+ push ebp
+ push ebx
+ mov eax, edi
+ call _itrans_sp
+ mov ebp, DWORD PTR _currSlice$[esp+100]
+ add esp, 80 ; 00000050H
+ jmp COPY_16x16
+
+$LN4@iMBtrans4x:
+
+; 433 : }
+; 434 : else if (currMB->is_lossless)
+
+ cmp DWORD PTR [edi+84], 0
+ je $LN2@iMBtrans4x
+
+ push 0
+ push 0
+
+; 435 : {
+; 436 : Inv_Residual_trans_4x4(currMB, pl, 0, 0);
+
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 437 : Inv_Residual_trans_4x4(currMB, pl, 4, 0);
+
+ push 0
+ push 4
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 438 : Inv_Residual_trans_4x4(currMB, pl, 0, 4);
+
+ push 4
+ push 0
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 439 : Inv_Residual_trans_4x4(currMB, pl, 4, 4);
+
+ push 4
+ push 4
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+ add esp, 64 ; 00000040H
+
+; 440 : Inv_Residual_trans_4x4(currMB, pl, 8, 0);
+
+ push 0
+ push 8
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 441 : Inv_Residual_trans_4x4(currMB, pl, 12, 0);
+
+ push 0
+ push 12 ; 0000000cH
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 442 : Inv_Residual_trans_4x4(currMB, pl, 8, 4);
+
+ push 4
+ push 8
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 443 : Inv_Residual_trans_4x4(currMB, pl, 12, 4);
+
+ push 4
+ push 12 ; 0000000cH
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+ add esp, 64 ; 00000040H
+
+; 444 : Inv_Residual_trans_4x4(currMB, pl, 0, 8);
+
+ push 8
+ push 0
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 445 : Inv_Residual_trans_4x4(currMB, pl, 4, 8);
+
+ push 8
+ push 4
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 446 : Inv_Residual_trans_4x4(currMB, pl, 0, 12);
+
+ push 12 ; 0000000cH
+ push 0
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 447 : Inv_Residual_trans_4x4(currMB, pl, 4, 12);
+
+ push 12 ; 0000000cH
+ push 4
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+ add esp, 64 ; 00000040H
+
+; 448 : Inv_Residual_trans_4x4(currMB, pl, 8, 8);
+
+ push 8
+ push 8
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 449 : Inv_Residual_trans_4x4(currMB, pl, 12, 8);
+
+ push 8
+ push 12 ; 0000000cH
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 450 : Inv_Residual_trans_4x4(currMB, pl, 8, 12);
+
+ push 12 ; 0000000cH
+ push 8
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+
+; 451 : Inv_Residual_trans_4x4(currMB, pl, 12, 12);
+
+ push 12 ; 0000000cH
+ push 12 ; 0000000cH
+ push esi
+ push edi
+ call _Inv_Residual_trans_4x4
+ add esp, 64 ; 00000040H
+
+; 452 : }
+; 453 : else
+
+ jmp COPY_16x16
+$LN2@iMBtrans4x:
+
+; 454 : {
+; 455 : const h264_short_block_t *blocks = currSlice->cof4[pl];
+; 456 : const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl];
+
+ mov edx, esi
+ mov ecx, esi
+ shl edx, 8
+ shl ecx, 9
+ lea eax, DWORD PTR [edx+ebp]
+ lea ebx, DWORD PTR [ecx+ebp+cof@Slice]
+
+; 457 : h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl];
+
+ ; put things in registers that itrans4x4_mmx_direct wants
+ lea edx, [eax + mb_rec@Slice]; mb_rec
+ lea ecx, [eax + mb_pred@Slice] ; mb_pred
+ mov eax, ebx ; blocks
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+32]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+128]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+160]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0);
+
+ ; second row
+ lea edx, [edx+52]
+ lea ecx, [ecx+52]
+ lea eax, [ebx+64]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+96]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+192]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+224]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4);
+
+ ; third row
+ lea edx, [edx+52]
+ lea ecx, [ecx+52]
+ lea eax, [ebx+256]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+288]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+384]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+416]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8);
+
+ ; fourth row
+ lea edx, [edx+52]
+ lea ecx, [ecx+52]
+ lea eax, [ebx+320]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+352]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+448]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12);
+
+ lea edx, [edx+4]
+ lea ecx, [ecx+4]
+ lea eax, [ebx+480]
+ call _itrans4x4_mmx_direct ; opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12);
+COPY_16x16:
+
+; construct picture from 4x4 blocks
+; opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_rec[pl]);
+
+ mov eax, DWORD PTR [edi+40]
+ mov ecx, DWORD PTR [edi+36]
+ shl esi, 8
+ lea edx, DWORD PTR [esi+ebp+mb_rec@Slice]
+ push edx
+ mov edx, DWORD PTR _curr_img$[esp+20]
+ push eax
+ push ecx
+ push edx
+ call DWORD PTR _opt+copy_image_data_16x16_stride@OptimizedFunctions
+ add esp, 16 ; 00000010H
+ pop edi
+ pop esi
+ pop ebp
+ pop ebx
+ pop ecx
+ ret 0
+_iMBtrans4x4 ENDP
+_TEXT ENDS
+
+_TEXT SEGMENT
+ ALIGN 2
+ PUBLIC _itrans8x8_sse2
+_itrans8x8_sse2 PROC NEAR
+; parameter 1(mb_rec): 8 + ebp
+; parameter 2(mb_pred): 12 + ebp
+; parameter 3(block): 16 + ebp
+; parameter 4(pos_x): 20 + ebp
+ push ebp
+ mov ebp, esp
+ and esp, -16
+ sub esp, 176
+ mov edx, DWORD PTR [ebp+20]
+ mov ecx, DWORD PTR [ebp+8] ; ecx: mb_rec
+ add ecx, edx
+ add edx, DWORD PTR [ebp+12] ; edx: mb_pred
+ mov eax, DWORD PTR [ebp+16] ; eax: block
+
+;;; __m128i a0, a1, a2, a3;
+;;; __m128i p0, p1, p2, p3, p4, p5 ,p6, p7;
+;;; __m128i b0, b1, b2, b3, b4, b5, b6, b7;
+;;; __m128i r0, r1, r2, r3, r4, r5, r6, r7;
+;;; __m128i const32, zero;
+;;; __declspec(align(32)) static const int16_t c32[8] = {32, 32, 32, 32, 32, 32, 32, 32};
+;;; __m128i pred0, pred1;
+;;;
+;;; const32 = _mm_load_si128((const __m128i *)c32);
+
+ movdqa xmm0, XMMWORD PTR const32
+
+;;; zero = _mm_setzero_si128();
+;;;
+;;; // Horizontal
+;;; b0 = _mm_load_si128((__m128i *)(block[0]));
+
+ movdqa xmm4, XMMWORD PTR [eax]
+
+;;; b1 = _mm_load_si128((__m128i *)(block[1]));
+
+ movdqa xmm7, XMMWORD PTR [eax+16]
+
+;;; b2 = _mm_load_si128((__m128i *)(block[2]));
+
+ movdqa xmm5, XMMWORD PTR [eax+32]
+
+;;; b3 = _mm_load_si128((__m128i *)(block[3]));
+
+ movdqa xmm3, XMMWORD PTR [eax+48]
+
+;;; b4 = _mm_load_si128((__m128i *)(block[4]));
+
+ movdqa xmm6, XMMWORD PTR [eax+64]
+
+;;; b5 = _mm_load_si128((__m128i *)(block[5]));
+;;; b6 = _mm_load_si128((__m128i *)(block[6]));
+
+ movdqa xmm1, XMMWORD PTR [eax+96]
+
+;;; b7 = _mm_load_si128((__m128i *)(block[7]));
+
+ movdqa xmm2, XMMWORD PTR [eax+112]
+ movdqa XMMWORD PTR [esp], xmm0
+ movdqa xmm0, XMMWORD PTR [eax+80]
+ movdqa XMMWORD PTR [esp+16], xmm2
+
+;;;
+;;; /* rotate 8x8 (ugh) */
+;;; r0 = _mm_unpacklo_epi16(b0, b2);
+
+ movdqa xmm2, xmm4
+ punpcklwd xmm2, xmm5
+
+;;; r1 = _mm_unpacklo_epi16(b1, b3);
+;;; r2 = _mm_unpackhi_epi16(b0, b2);
+
+ punpckhwd xmm4, xmm5
+
+;;; r3 = _mm_unpackhi_epi16(b1, b3);
+;;; r4 = _mm_unpacklo_epi16(b4, b6);
+;;; r5 = _mm_unpacklo_epi16(b5, b7);
+
+ movdqa xmm5, xmm0
+ movdqa XMMWORD PTR [esp+32], xmm2
+ movdqa xmm2, xmm7
+ punpcklwd xmm2, xmm3
+ punpckhwd xmm7, xmm3
+ movdqa xmm3, xmm6
+ punpcklwd xmm3, xmm1
+ movdqa XMMWORD PTR [esp+48], xmm3
+ movdqa xmm3, XMMWORD PTR [esp+16]
+ punpcklwd xmm5, xmm3
+
+;;; r6 = _mm_unpackhi_epi16(b4, b6);
+
+ punpckhwd xmm6, xmm1
+;;; r7 = _mm_unpackhi_epi16(b5, b7);
+
+ punpckhwd xmm0, xmm3
+
+;;;
+;;; b0 = _mm_unpacklo_epi16(r0, r1);
+
+ movdqa xmm3, XMMWORD PTR [esp+32]
+ movdqa xmm1, xmm3
+ punpcklwd xmm1, xmm2
+
+;;; b1 = _mm_unpackhi_epi16(r0, r1);
+
+ punpckhwd xmm3, xmm2
+
+;;; b2 = _mm_unpacklo_epi16(r2, r3);
+
+ movdqa xmm2, xmm4
+ punpcklwd xmm2, xmm7
+
+;;; b3 = _mm_unpackhi_epi16(r2, r3);
+
+ punpckhwd xmm4, xmm7
+ movdqa XMMWORD PTR [esp+64], xmm4
+
+;;; b4 = _mm_unpacklo_epi16(r4, r5);
+
+ movdqa xmm4, XMMWORD PTR [esp+48]
+ movdqa xmm7, xmm4
+ punpcklwd xmm7, xmm5
+
+;;; b5 = _mm_unpackhi_epi16(r4, r5);
+
+ punpckhwd xmm4, xmm5
+
+;;; b6 = _mm_unpacklo_epi16(r6, r7);
+
+ movdqa xmm5, xmm6
+ punpcklwd xmm5, xmm0
+
+;;; b7 = _mm_unpackhi_epi16(r6, r7);
+
+ punpckhwd xmm6, xmm0
+
+;;;
+;;; p0 = _mm_unpacklo_epi64(b0, b4);
+
+ movdqa xmm0, xmm1
+ punpcklqdq xmm0, xmm7
+
+;;; p1 = _mm_unpackhi_epi64(b0, b4);
+
+ punpckhqdq xmm1, xmm7
+ movdqa XMMWORD PTR [esp+16], xmm1
+
+;;; p2 = _mm_unpacklo_epi64(b1, b5);
+
+ movdqa xmm1, xmm3
+ punpcklqdq xmm1, xmm4
+
+;;; p3 = _mm_unpackhi_epi64(b1, b5);
+;;; p4 = _mm_unpacklo_epi64(b2, b6);
+;;; p5 = _mm_unpackhi_epi64(b2, b6);
+;;; p6 = _mm_unpacklo_epi64(b3, b7);
+;;; p7 = _mm_unpackhi_epi64(b3, b7);
+;;;
+;;; /* perform approx DCT */
+;;; a0 = _mm_add_epi16(p0, p4); // p0 + p4
+;;; a1 = _mm_sub_epi16(p0, p4); // p0 - p4
+;;; r0 = _mm_srai_epi16(p2, 1); // p2 >> 1
+
+ movdqa xmm7, xmm1
+ psraw xmm7, 1
+ punpckhqdq xmm3, xmm4
+ movdqa XMMWORD PTR [esp+32], xmm3
+ movdqa xmm3, xmm2
+ punpcklqdq xmm3, xmm5
+ punpckhqdq xmm2, xmm5
+ movdqa xmm5, XMMWORD PTR [esp+64]
+ movdqa xmm4, xmm5
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm6, xmm0
+ paddw xmm6, xmm3
+ psubw xmm0, xmm3
+
+;;; a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1)
+
+ movdqa xmm3, xmm4
+
+;;; r0 = _mm_srai_epi16(p6, 1); // p6 >> 1
+
+ psraw xmm4, 1
+ psubw xmm3, xmm7
+
+;;; a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1)
+
+ paddw xmm1, xmm4
+
+;;;
+;;; b0 = _mm_add_epi16(a0, a3); // a0 + a3;
+
+ movdqa xmm4, xmm6
+
+;;; b2 = _mm_sub_epi16(a1, a2); // a1 - a2;
+
+ movdqa xmm7, xmm0
+ paddw xmm4, xmm1
+ psubw xmm7, xmm3
+ movdqa XMMWORD PTR [esp+48], xmm7
+
+;;; b4 = _mm_add_epi16(a1, a2); // a1 + a2;
+
+ paddw xmm0, xmm3
+ movdqa XMMWORD PTR [esp+80], xmm0
+
+;;; b6 = _mm_sub_epi16(a0, a3); // a0 - a3;
+;;;
+;;; //-p3 + p5 - p7 - (p7 >> 1);
+;;; r0 = _mm_srai_epi16(p7, 1); // p7 >> 1
+;;; a0 = _mm_sub_epi16(p5, p3); // p5 - p3
+
+ movdqa xmm0, XMMWORD PTR [esp+32]
+ psubw xmm6, xmm1
+ movdqa xmm1, xmm5
+ psraw xmm1, 1
+ movdqa xmm3, xmm2
+
+;;; a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7
+;;; a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1)
+;;;
+;;; //p1 + p7 - p3 - (p3 >> 1);
+;;; r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1)
+
+ movdqa xmm7, xmm0
+ movdqa XMMWORD PTR [esp+96], xmm6
+
+;;; a1 = _mm_add_epi16(p1, p7); // p1 + p7
+
+ movdqa xmm6, XMMWORD PTR [esp+16]
+ psubw xmm3, xmm0
+ psubw xmm3, xmm5
+ psraw xmm7, 1
+ psubw xmm3, xmm1
+ movdqa xmm1, xmm6
+ paddw xmm1, xmm5
+
+;;; a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3
+
+ psubw xmm1, xmm0
+
+;;; a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1)
+
+ psubw xmm1, xmm7
+
+;;;
+;;; // -p1 + p7 + p5 + (p5 >> 1);
+;;; r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1)
+
+ movdqa xmm7, xmm2
+ psraw xmm7, 1
+
+;;; a2 = _mm_sub_epi16(p7, p1); // p7 - p1
+
+ psubw xmm5, xmm6
+
+;;; a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5
+
+ paddw xmm5, xmm2
+
+;;; a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1)
+
+ paddw xmm5, xmm7
+
+;;;
+;;; // p3 + p5 + p1 + (p1 >> 1);
+;;; a3 = _mm_add_epi16(p3, p5); // p3+p5
+
+ paddw xmm0, xmm2
+
+;;; a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1
+;;; p1 = _mm_srai_epi16(p1, 1); // p1 >> 1
+;;; a3 = _mm_add_epi16(a3, p1); //p3 + p5 + p1 + (p1 >> 1)
+;;;
+;;; r0 = _mm_srai_epi16(a3, 2); // a3>>2
+;;; b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2);
+;;; r0 = _mm_srai_epi16(a2, 2); // a2>>2
+;;; b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2);
+;;; a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe
+;;; b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2);
+;;; a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe
+;;; b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2);
+;;;
+;;; p0 = _mm_add_epi16(b0, b7); // b0 + b7;
+;;; p1 = _mm_sub_epi16(b2, b5); // b2 - b5;
+;;; p2 = _mm_add_epi16(b4, b3); // b4 + b3;
+;;; p3 = _mm_add_epi16(b6, b1); // b6 + b1;
+
+ movdqa xmm2, XMMWORD PTR [esp+96]
+ paddw xmm0, xmm6
+ psraw xmm6, 1
+ paddw xmm0, xmm6
+ movdqa xmm7, xmm0
+ movdqa xmm6, xmm5
+ psraw xmm7, 2
+ paddw xmm7, xmm3
+ psraw xmm6, 2
+ paddw xmm6, xmm1
+ psraw xmm1, 2
+ psubw xmm5, xmm1
+ movdqa xmm1, xmm4
+ psraw xmm3, 2
+ psubw xmm0, xmm3
+ movdqa xmm3, XMMWORD PTR [esp+80]
+ movdqa XMMWORD PTR [esp+32], xmm0
+
+;;; p4 = _mm_sub_epi16(b6, b1); // b6 - b1;
+;;; p5 = _mm_sub_epi16(b4, b3); // b4 - b3;
+;;; p6 = _mm_add_epi16(b2, b5); // b2 + b5;
+;;; p7 = _mm_sub_epi16(b0, b7); // b0 - b7;
+
+ psubw xmm4, XMMWORD PTR [esp+32]
+ paddw xmm1, xmm0
+ movdqa XMMWORD PTR [esp+112], xmm1
+ movdqa xmm1, XMMWORD PTR [esp+48]
+ movdqa xmm0, xmm1
+ psubw xmm0, xmm5
+ movdqa XMMWORD PTR [esp+16], xmm0
+ movdqa xmm0, xmm3
+ paddw xmm0, xmm6
+ psubw xmm3, xmm6
+ movdqa XMMWORD PTR [esp+128], xmm0
+
+;;;
+;;; /* rotate 8x8 (ugh) */
+;;; r0 = _mm_unpacklo_epi16(p0, p2);
+
+ movdqa xmm6, XMMWORD PTR [esp+128]
+ movdqa xmm0, xmm2
+ paddw xmm0, xmm7
+ psubw xmm2, xmm7
+ paddw xmm1, xmm5
+ movdqa xmm5, XMMWORD PTR [esp+112]
+ movdqa XMMWORD PTR [esp+144], xmm4
+ movdqa xmm4, xmm5
+ punpcklwd xmm4, xmm6
+
+;;; r1 = _mm_unpacklo_epi16(p1, p3);
+;;; r2 = _mm_unpackhi_epi16(p0, p2);
+
+ punpckhwd xmm5, xmm6
+
+;;; r3 = _mm_unpackhi_epi16(p1, p3);
+;;; r4 = _mm_unpacklo_epi16(p4, p6);
+;;; r5 = _mm_unpacklo_epi16(p5, p7);
+
+ movdqa xmm6, xmm3
+ movdqa XMMWORD PTR [esp+64], xmm4
+ movdqa xmm4, XMMWORD PTR [esp+16]
+ movdqa xmm7, xmm4
+ punpcklwd xmm7, xmm0
+ punpckhwd xmm4, xmm0
+ movdqa xmm0, xmm2
+ punpcklwd xmm0, xmm1
+ movdqa XMMWORD PTR [esp+128], xmm0
+ movdqa xmm0, XMMWORD PTR [esp+144]
+ punpcklwd xmm6, xmm0
+
+;;; r6 = _mm_unpackhi_epi16(p4, p6);
+
+ punpckhwd xmm2, xmm1
+
+;;; r7 = _mm_unpackhi_epi16(p5, p7);
+;;;
+;;; b0 = _mm_unpacklo_epi16(r0, r1);
+
+ movdqa xmm1, XMMWORD PTR [esp+64]
+ punpckhwd xmm3, xmm0
+ movdqa xmm0, xmm1
+ punpcklwd xmm0, xmm7
+
+;;; b1 = _mm_unpackhi_epi16(r0, r1);
+
+ punpckhwd xmm1, xmm7
+
+;;; b2 = _mm_unpacklo_epi16(r2, r3);
+
+ movdqa xmm7, xmm5
+ punpcklwd xmm7, xmm4
+
+;;; b3 = _mm_unpackhi_epi16(r2, r3);
+
+ punpckhwd xmm5, xmm4
+ movdqa XMMWORD PTR [esp+112], xmm5
+
+;;; b4 = _mm_unpacklo_epi16(r4, r5);
+
+ movdqa xmm5, XMMWORD PTR [esp+128]
+ movdqa xmm4, xmm5
+ punpcklwd xmm4, xmm6
+
+;;; b5 = _mm_unpackhi_epi16(r4, r5);
+
+ punpckhwd xmm5, xmm6
+
+;;; b6 = _mm_unpacklo_epi16(r6, r7);
+
+ movdqa xmm6, xmm2
+ punpcklwd xmm6, xmm3
+
+;;; b7 = _mm_unpackhi_epi16(r6, r7);
+
+ punpckhwd xmm2, xmm3
+
+;;;
+;;; p0 = _mm_unpacklo_epi64(b0, b4);
+
+ movdqa xmm3, xmm0
+ punpcklqdq xmm3, xmm4
+
+;;; p1 = _mm_unpackhi_epi64(b0, b4);
+
+ punpckhqdq xmm0, xmm4
+ movdqa XMMWORD PTR [esp+144], xmm0
+
+;;; p2 = _mm_unpacklo_epi64(b1, b5);
+;;; p3 = _mm_unpackhi_epi64(b1, b5);
+;;; p4 = _mm_unpacklo_epi64(b2, b6);
+;;; p5 = _mm_unpackhi_epi64(b2, b6);
+;;; p6 = _mm_unpacklo_epi64(b3, b7);
+
+ movdqa xmm0, XMMWORD PTR [esp+112]
+ movdqa xmm4, xmm1
+ punpcklqdq xmm4, xmm5
+ punpckhqdq xmm1, xmm5
+ movdqa XMMWORD PTR [esp+64], xmm1
+ movdqa xmm1, xmm7
+ movdqa xmm5, xmm0
+ punpcklqdq xmm1, xmm6
+ punpckhqdq xmm7, xmm6
+
+;;; p7 = _mm_unpackhi_epi64(b3, b7);
+;;;
+;;;
+;;; /* Vertical */
+;;;
+;;; a0 = _mm_add_epi16(p0, p4); // p0 + p4
+;;; a1 = _mm_sub_epi16(p0, p4); // p0 - p4
+;;; r0 = _mm_srai_epi16(p2, 1); // p2 >> 1
+
+ movdqa xmm6, xmm4
+ psraw xmm6, 1
+ punpcklqdq xmm5, xmm2
+ punpckhqdq xmm0, xmm2
+ movdqa xmm2, xmm3
+ paddw xmm2, xmm1
+ psubw xmm3, xmm1
+
+;;; a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1)
+
+ movdqa xmm1, xmm5
+
+;;; r0 = _mm_srai_epi16(p6, 1); // p6 >> 1
+
+ psraw xmm5, 1
+ psubw xmm1, xmm6
+
+;;; a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1)
+
+ paddw xmm4, xmm5
+
+;;;
+;;; b0 = _mm_add_epi16(a0, a3); // a0 + a3;
+
+ movdqa xmm5, xmm2
+
+;;; b2 = _mm_sub_epi16(a1, a2); // a1 - a2;
+
+ movdqa xmm6, xmm3
+ paddw xmm5, xmm4
+ psubw xmm6, xmm1
+ movdqa XMMWORD PTR [esp+128], xmm6
+
+;;; b4 = _mm_add_epi16(a1, a2); // a1 + a2;
+;;; b6 = _mm_sub_epi16(a0, a3); // a0 - a3;
+;;;
+;;; //-p3 + p5 - p7 - (p7 >> 1);
+;;; r0 = _mm_srai_epi16(p7, 1); // p7 >> 1
+;;; a0 = _mm_sub_epi16(p5, p3); // p5 - p3
+
+ movdqa xmm6, XMMWORD PTR [esp+64]
+ paddw xmm3, xmm1
+ movdqa XMMWORD PTR [esp+80], xmm3
+ psubw xmm2, xmm4
+ movdqa xmm1, xmm0
+ psraw xmm1, 1
+ movdqa xmm3, xmm7
+ movdqa XMMWORD PTR [esp+96], xmm2
+ psubw xmm3, xmm6
+
+;;; a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7
+
+ psubw xmm3, xmm0
+
+;;; a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1)
+;;;
+;;; //p1 + p7 - p3 - (p3 >> 1);
+;;; r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1)
+
+ movdqa xmm2, xmm6
+ psraw xmm2, 1
+ psubw xmm3, xmm1
+
+;;; a1 = _mm_add_epi16(p1, p7); // p1 + p7
+
+ movdqa xmm1, XMMWORD PTR [esp+144]
+ movdqa xmm4, xmm1
+ paddw xmm4, xmm0
+
+;;; a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3
+
+ psubw xmm4, xmm6
+
+;;; a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1)
+
+ psubw xmm4, xmm2
+
+;;;
+;;; // -p1 + p7 + p5 + (p5 >> 1);
+;;; r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1)
+
+ movdqa xmm2, xmm7
+ psraw xmm2, 1
+
+;;; a2 = _mm_sub_epi16(p7, p1); // p7 - p1
+
+ psubw xmm0, xmm1
+
+;;; a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5
+
+ paddw xmm0, xmm7
+
+;;; a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1)
+
+ paddw xmm0, xmm2
+
+;;;
+;;; // p3 + p5 + p1 + (p1 >> 1);
+;;; r0 = _mm_srai_epi16(p1, 1); // p1 >> 1
+
+ movdqa xmm2, xmm1
+ psraw xmm2, 1
+
+;;; a3 = _mm_add_epi16(p3, p5); // p3+p5
+
+ paddw xmm6, xmm7
+
+;;; a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1
+;;; a3 = _mm_add_epi16(a3, r0); //p3 + p5 + p1 + (p1 >> 1)
+;;;
+;;; r0 = _mm_srai_epi16(a3, 2); // a3>>2
+;;; b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2);
+;;; r0 = _mm_srai_epi16(a2, 2); // a2>>2
+;;; b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2);
+;;; a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe
+;;; b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2);
+;;; a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe
+;;; b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2);
+;;;
+;;; r0 = _mm_add_epi16(b0, b7); // b0 + b7;
+;;; r1 = _mm_sub_epi16(b2, b5); // b2 - b5;
+
+ movdqa xmm7, XMMWORD PTR [esp+128]
+ paddw xmm6, xmm1
+ paddw xmm6, xmm2
+ movdqa xmm1, xmm6
+ psraw xmm1, 2
+ movdqa xmm2, xmm0
+ paddw xmm1, xmm3
+ psraw xmm2, 2
+ paddw xmm2, xmm4
+ psraw xmm4, 2
+ psubw xmm0, xmm4
+ psraw xmm3, 2
+ psubw xmm6, xmm3
+ movdqa XMMWORD PTR [esp+64], xmm6
+ movdqa xmm3, xmm5
+
+;;; r2 = _mm_add_epi16(b4, b3); // b4 + b3;
+;;; r3 = _mm_add_epi16(b6, b1); // b6 + b1;
+;;; r4 = _mm_sub_epi16(b6, b1); // b6 - b1;
+;;; r5 = _mm_sub_epi16(b4, b3); // b4 - b3;
+;;; r6 = _mm_add_epi16(b2, b5); // b2 + b5;
+;;; r7 = _mm_sub_epi16(b0, b7); // b0 - b7;
+
+ psubw xmm5, XMMWORD PTR [esp+64]
+ paddw xmm3, xmm6
+ movdqa XMMWORD PTR [esp+144], xmm3
+ movdqa xmm3, xmm7
+ psubw xmm3, xmm0
+ movdqa XMMWORD PTR [esp+48], xmm3
+ movdqa xmm3, XMMWORD PTR [esp+80]
+ movdqa xmm4, xmm3
+ paddw xmm4, xmm2
+ psubw xmm3, xmm2
+
+;;;
+;;;
+;;; // add in prediction values
+;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[0][pos_x]));
+;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[1][pos_x]));
+;;; // (x + 32) >> 6
+;;; r0 = _mm_adds_epi16(r0, const32);
+
+ movdqa xmm2, XMMWORD PTR const32
+ movdqa XMMWORD PTR [esp+16], xmm4
+ movdqa xmm4, XMMWORD PTR [esp+96]
+ movdqa xmm6, xmm4
+ paddw xmm6, xmm1
+ psubw xmm4, xmm1
+
+;;; r0 = _mm_srai_epi16(r0, 6);
+;;; r1 = _mm_adds_epi16(r1, const32);
+
+ movdqa xmm1, XMMWORD PTR [esp+48]
+ paddw xmm7, xmm0
+ movdqa xmm0, XMMWORD PTR [esp+144]
+ movdqa XMMWORD PTR [esp+128], xmm7
+
+;;; r1 = _mm_srai_epi16(r1, 6);
+;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+
+ movq xmm7, QWORD PTR [edx+16]
+ movdqa XMMWORD PTR [esp+32], xmm5
+ paddsw xmm0, xmm2
+ psraw xmm0, 6
+ paddsw xmm1, xmm2
+ pxor xmm2, xmm2
+ punpcklbw xmm7, xmm2
+ movq xmm5, QWORD PTR [edx]
+ punpcklbw xmm5, xmm2
+ psraw xmm1, 6
+
+;;; pred0 = _mm_adds_epi16(pred0, r0);
+;;; pred1 = _mm_adds_epi16(pred1, r1);
+
+ paddsw xmm7, xmm1
+ paddsw xmm5, xmm0
+
+;;;
+;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ packuswb xmm5, xmm7
+
+;;;
+;;; // store
+;;; _mm_storel_epi64((__m128i *)(&mb_rec[0][pos_x]), pred0);
+
+ movdqa xmm0, XMMWORD PTR [esp+32]
+ movdqa xmm2, XMMWORD PTR [esp+128]
+ movq QWORD PTR [ecx], xmm5
+
+;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+;;; pred0 = _mm_srli_si128(pred0, 8);
+
+ psrldq xmm5, 8
+
+;;; _mm_storel_epi64((__m128i *)(&mb_rec[1][pos_x]), pred0);
+
+ movq QWORD PTR [ecx+16], xmm5
+
+;;;
+;;; /* --- */
+;;;
+;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[2][pos_x]));
+
+ movq xmm1, QWORD PTR [edx+32]
+
+;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[3][pos_x]));
+;;; // (x + 32) >> 6
+;;; r2 = _mm_adds_epi16(r2, const32);
+
+ movdqa xmm5, XMMWORD PTR [esp]
+ movdqa XMMWORD PTR [esp+32], xmm0 ;
+
+;;; r2 = _mm_srai_epi16(r2, 6);
+;;; r3 = _mm_adds_epi16(r3, const32);
+
+ paddsw xmm6, xmm5
+
+;;; r3 = _mm_srai_epi16(r3, 6);
+
+ psraw xmm6, 6
+
+;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+
+ pxor xmm7, xmm7
+ punpcklbw xmm1, xmm7
+ movdqa xmm0, XMMWORD PTR [esp+16]
+ paddsw xmm0, xmm5
+ psraw xmm0, 6
+
+;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+;;; pred0 = _mm_adds_epi16(pred0, r2);
+
+ paddsw xmm1, xmm0
+
+;;; pred1 = _mm_adds_epi16(pred1, r3);
+;;;
+;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+;;;
+;;; // store
+;;; _mm_storel_epi64((__m128i *)(&mb_rec[2][pos_x]), pred0);
+
+ movdqa xmm0, XMMWORD PTR [esp+32]
+ movq xmm5, QWORD PTR [edx+48]
+ punpcklbw xmm5, xmm7
+ paddsw xmm5, xmm6
+ packuswb xmm1, xmm5
+ movq QWORD PTR [ecx+32], xmm1
+
+;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+;;; pred0 = _mm_srli_si128(pred0, 8);
+
+ psrldq xmm1, 8
+
+;;; _mm_storel_epi64((__m128i *)(&mb_rec[3][pos_x]), pred0);
+
+ movq QWORD PTR [ecx+48], xmm1
+
+;;;
+;;; /* --- */
+;;;
+;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[4][pos_x]));
+
+ movq xmm7, QWORD PTR [edx+64]
+
+;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[5][pos_x]));
+
+ movq xmm6, QWORD PTR [edx+80]
+
+;;; // (x + 32) >> 6
+;;; r4 = _mm_adds_epi16(r4, const32);
+;;; r4 = _mm_srai_epi16(r4, 6);
+;;; r5 = _mm_adds_epi16(r5, const32);
+;;; r5 = _mm_srai_epi16(r5, 6);
+;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+
+ pxor xmm5, xmm5
+ punpcklbw xmm7, xmm5
+
+;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+
+ punpcklbw xmm6, xmm5
+ movdqa xmm1, XMMWORD PTR [esp]
+ paddsw xmm4, xmm1
+ psraw xmm4, 6
+ paddsw xmm3, xmm1
+ psraw xmm3, 6
+
+;;; pred0 = _mm_adds_epi16(pred0, r4);
+
+ paddsw xmm7, xmm4
+
+;;; pred1 = _mm_adds_epi16(pred1, r5);
+
+ paddsw xmm6, xmm3
+
+;;;
+;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ packuswb xmm7, xmm6
+
+;;;
+;;; // store
+;;; _mm_storel_epi64((__m128i *)(&mb_rec[4][pos_x]), pred0);
+
+ movq QWORD PTR [ecx+64], xmm7
+
+;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+;;; pred0 = _mm_srli_si128(pred0, 8);
+
+ psrldq xmm7, 8
+
+;;; _mm_storel_epi64((__m128i *)(&mb_rec[5][pos_x]), pred0);
+
+ movq QWORD PTR [ecx+80], xmm7
+
+
+;;;
+;;; /* --- */
+;;;
+;;; pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[6][pos_x]));
+
+ movq xmm5, QWORD PTR [edx+96]
+
+;;; pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[7][pos_x]));
+
+ movq xmm4, QWORD PTR [edx+112]
+
+;;; // (x + 32) >> 6
+;;; r6 = _mm_adds_epi16(r6, const32);
+;;; r6 = _mm_srai_epi16(r6, 6);
+;;; r7 = _mm_adds_epi16(r7, const32);
+;;; r7 = _mm_srai_epi16(r7, 6);
+;;; pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+
+ pxor xmm3, xmm3
+ punpcklbw xmm5, xmm3
+
+;;; pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+
+ punpcklbw xmm4, xmm3
+ movdqa xmm1, XMMWORD PTR [esp]
+ paddsw xmm2, xmm1
+ psraw xmm2, 6
+ paddsw xmm0, xmm1
+ psraw xmm0, 6
+
+;;; pred0 = _mm_adds_epi16(pred0, r6);
+
+ paddsw xmm5, xmm2
+
+;;; pred1 = _mm_adds_epi16(pred1, r7);
+
+ paddsw xmm4, xmm0
+
+;;;
+;;; pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ packuswb xmm5, xmm4
+
+;;;
+;;; // store
+;;; _mm_storel_epi64((__m128i *)&mb_rec[6][pos_x], pred0);
+
+ movq QWORD PTR [ecx+96], xmm5
+
+;;; // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+;;; pred0 = _mm_srli_si128(pred0, 8);
+
+ psrldq xmm5, 8
+
+;;; _mm_storel_epi64((__m128i *)&mb_rec[7][pos_x], pred0);
+
+ movq QWORD PTR [ecx+112], xmm5
+ mov esp, ebp
+ pop ebp
+ ret
+ ALIGN 2
+_itrans8x8_sse2 ENDP
+
+
+END \ No newline at end of file
diff --git a/Src/h264dec/ldecod/src/quant.c b/Src/h264dec/ldecod/src/quant.c
new file mode 100644
index 00000000..2f01c34a
--- /dev/null
+++ b/Src/h264dec/ldecod/src/quant.c
@@ -0,0 +1,338 @@
+
+/*!
+***********************************************************************
+* \file
+* quant.c
+*
+* \brief
+* Quantization functions
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+*
+***********************************************************************
+*/
+
+#include "contributors.h"
+
+#include "global.h"
+#include "memalloc.h"
+#include "block.h"
+#include "image.h"
+#include "mb_access.h"
+#include "transform.h"
+#include "quant.h"
+
+int quant_intra_default[16] = {
+ 6,13,20,28,
+ 13,20,28,32,
+ 20,28,32,37,
+ 28,32,37,42
+};
+
+int quant_inter_default[16] = {
+ 10,14,20,24,
+ 14,20,24,27,
+ 20,24,27,30,
+ 24,27,30,34
+};
+
+int quant8_intra_default[64] = {
+ 6,10,13,16,18,23,25,27,
+ 10,11,16,18,23,25,27,29,
+ 13,16,18,23,25,27,29,31,
+ 16,18,23,25,27,29,31,33,
+ 18,23,25,27,29,31,33,36,
+ 23,25,27,29,31,33,36,38,
+ 25,27,29,31,33,36,38,40,
+ 27,29,31,33,36,38,40,42
+};
+
+int quant8_inter_default[64] = {
+ 9,13,15,17,19,21,22,24,
+ 13,13,17,19,21,22,24,25,
+ 15,17,19,21,22,24,25,27,
+ 17,19,21,22,24,25,27,28,
+ 19,21,22,24,25,27,28,30,
+ 21,22,24,25,27,28,30,32,
+ 22,24,25,27,28,30,32,33,
+ 24,25,27,28,30,32,33,35
+};
+
+int quant_org[16] = { //to be use if no q matrix is chosen
+ 16,16,16,16,
+ 16,16,16,16,
+ 16,16,16,16,
+ 16,16,16,16
+};
+
+int quant8_org[64] = { //to be use if no q matrix is chosen
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16,
+ 16,16,16,16,16,16,16,16
+};
+
+static void CalculateQuant8x8Param(Slice *currslice);
+
+/*!
+***********************************************************************
+* \brief
+* Initiate quantization process arrays
+***********************************************************************
+*/
+void init_qp_process(VideoParameters *p_Vid)
+{
+ int bitdepth_qp_scale = imax(p_Vid->bitdepth_luma_qp_scale,p_Vid->bitdepth_chroma_qp_scale);
+ int i;
+
+ // We should allocate memory outside of this process since maybe we will have a change of SPS
+ // and we may need to recreate these. Currently should only support same bitdepth
+ if (p_Vid->qp_per_matrix == NULL)
+ if ((p_Vid->qp_per_matrix = (int*)malloc((MAX_QP + 1 + bitdepth_qp_scale)*sizeof(int))) == NULL)
+ no_mem_exit("init_qp_process: p_Vid->qp_per_matrix");
+
+ if (p_Vid->qp_rem_matrix == NULL)
+ if ((p_Vid->qp_rem_matrix = (int*)malloc((MAX_QP + 1 + bitdepth_qp_scale)*sizeof(int))) == NULL)
+ no_mem_exit("init_qp_process: p_Vid->qp_rem_matrix");
+
+ for (i = 0; i < MAX_QP + bitdepth_qp_scale + 1; i++)
+ {
+ p_Vid->qp_per_matrix[i] = i / 6;
+ p_Vid->qp_rem_matrix[i] = i % 6;
+ }
+}
+
+void free_qp_matrices(VideoParameters *p_Vid)
+{
+ if (p_Vid->qp_per_matrix != NULL)
+ {
+ free (p_Vid->qp_per_matrix);
+ p_Vid->qp_per_matrix = NULL;
+ }
+
+ if (p_Vid->qp_rem_matrix != NULL)
+ {
+ free (p_Vid->qp_rem_matrix);
+ p_Vid->qp_rem_matrix = NULL;
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* For mapping the q-matrix to the active id and calculate quantisation values
+*
+* \param currSlice
+* Slice pointer
+* \param pps
+* Picture parameter set
+* \param sps
+* Sequence parameter set
+*
+************************************************************************
+*/
+void assign_quant_params(Slice *currSlice)
+{
+ seq_parameter_set_rbsp_t* sps = currSlice->active_sps;
+ pic_parameter_set_rbsp_t* pps = currSlice->active_pps;
+ int i;
+ int n_ScalingList;
+
+ if(!pps->pic_scaling_matrix_present_flag && !sps->seq_scaling_matrix_present_flag)
+ {
+ for(i=0; i<12; i++)
+ currSlice->qmatrix[i] = (i < 6) ? quant_org : quant8_org;
+ }
+ else
+ {
+ n_ScalingList = (sps->chroma_format_idc != YUV444) ? 8 : 12;
+ if(sps->seq_scaling_matrix_present_flag) // check sps first
+ {
+ for(i=0; i<n_ScalingList; i++)
+ {
+ if(i<6)
+ {
+ if(!sps->seq_scaling_list_present_flag[i]) // fall-back rule A
+ {
+ if(i==0)
+ currSlice->qmatrix[i] = quant_intra_default;
+ else if(i==3)
+ currSlice->qmatrix[i] = quant_inter_default;
+ else
+ currSlice->qmatrix[i] = currSlice->qmatrix[i-1];
+ }
+ else
+ {
+ if(sps->UseDefaultScalingMatrix4x4Flag[i])
+ currSlice->qmatrix[i] = (i<3) ? quant_intra_default : quant_inter_default;
+ else
+ currSlice->qmatrix[i] = sps->ScalingList4x4[i];
+ }
+ }
+ else
+ {
+ if(!sps->seq_scaling_list_present_flag[i]) // fall-back rule A
+ {
+ if(i==6)
+ currSlice->qmatrix[i] = quant8_intra_default;
+ else if(i==7)
+ currSlice->qmatrix[i] = quant8_inter_default;
+ else
+ currSlice->qmatrix[i] = currSlice->qmatrix[i-2];
+ }
+ else
+ {
+ if(sps->UseDefaultScalingMatrix8x8Flag[i-6])
+ currSlice->qmatrix[i] = (i==6 || i==8 || i==10) ? quant8_intra_default:quant8_inter_default;
+ else
+ currSlice->qmatrix[i] = sps->ScalingList8x8[i-6];
+ }
+ }
+ }
+ }
+
+ if(pps->pic_scaling_matrix_present_flag) // then check pps
+ {
+ for(i=0; i<n_ScalingList; i++)
+ {
+ if(i<6)
+ {
+ if(!pps->pic_scaling_list_present_flag[i]) // fall-back rule B
+ {
+ if (i==0)
+ {
+ if(!sps->seq_scaling_matrix_present_flag)
+ currSlice->qmatrix[i] = quant_intra_default;
+ }
+ else if (i==3)
+ {
+ if(!sps->seq_scaling_matrix_present_flag)
+ currSlice->qmatrix[i] = quant_inter_default;
+ }
+ else
+ currSlice->qmatrix[i] = currSlice->qmatrix[i-1];
+ }
+ else
+ {
+ if(pps->UseDefaultScalingMatrix4x4Flag[i])
+ currSlice->qmatrix[i] = (i<3) ? quant_intra_default:quant_inter_default;
+ else
+ currSlice->qmatrix[i] = pps->ScalingList4x4[i];
+ }
+ }
+ else
+ {
+ if(!pps->pic_scaling_list_present_flag[i]) // fall-back rule B
+ {
+ if (i==6)
+ {
+ if(!sps->seq_scaling_matrix_present_flag)
+ currSlice->qmatrix[i] = quant8_intra_default;
+ }
+ else if(i==7)
+ {
+ if(!sps->seq_scaling_matrix_present_flag)
+ currSlice->qmatrix[i] = quant8_inter_default;
+ }
+ else
+ currSlice->qmatrix[i] = currSlice->qmatrix[i-2];
+ }
+ else
+ {
+ if(pps->UseDefaultScalingMatrix8x8Flag[i-6])
+ currSlice->qmatrix[i] = (i==6 || i==8 || i==10) ? quant8_intra_default:quant8_inter_default;
+ else
+ currSlice->qmatrix[i] = pps->ScalingList8x8[i-6];
+ }
+ }
+ }
+ }
+ }
+
+ CalculateQuant4x4Param(currSlice);
+ if(pps->transform_8x8_mode_flag)
+ CalculateQuant8x8Param(currSlice);
+}
+
+/*!
+************************************************************************
+* \brief
+* For calculating the quantisation values at frame level
+*
+************************************************************************
+*/
+void CalculateQuant4x4Param(Slice *currSlice)
+{
+ int i, j, k, temp;
+
+ for(k=0; k<6; k++)
+ {
+ for(i=0; i<4; i++)
+ {
+ for(j=0; j<4; j++)
+ {
+ temp = (i<<2)+j;
+ currSlice->InvLevelScale4x4_Intra[0][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[0][temp];
+ currSlice->InvLevelScale4x4_Intra[1][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[1][temp];
+ currSlice->InvLevelScale4x4_Intra[2][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[2][temp];
+
+ currSlice->InvLevelScale4x4_Inter[0][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[3][temp];
+ currSlice->InvLevelScale4x4_Inter[1][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[4][temp];
+ currSlice->InvLevelScale4x4_Inter[2][k][i][j] = dequant_coef[k][i][j] * currSlice->qmatrix[5][temp];
+ }
+ }
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* Calculate the quantisation and inverse quantisation parameters
+*
+************************************************************************
+*/
+static void CalculateQuant8x8Param(Slice *currSlice)
+{
+ VideoParameters *p_Vid = currSlice->p_Vid;
+ int i, j, k, temp;
+
+ for(k=0; k<6; k++)
+ {
+ int x = 0;
+ for(i=0; i<8; i++)
+ {
+ for(j=0; j<8; j++)
+ {
+ temp = (i<<3)+j;
+ currSlice->InvLevelScale8x8_Intra[0][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[6][temp];
+ currSlice->InvLevelScale8x8_Inter[0][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[7][temp];
+ x++;
+ }
+ }
+ }
+
+ if( p_Vid->active_sps->chroma_format_idc == YUV444 ) // 4:4:4
+ {
+ for(k=0; k<6; k++)
+ {
+ int x=0;
+ for(i=0; i<8; i++)
+ {
+ for(j=0; j<8; j++)
+ {
+ temp = (i<<3)+j;
+ currSlice->InvLevelScale8x8_Intra[1][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[8][temp];
+ currSlice->InvLevelScale8x8_Inter[1][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[9][temp];
+ currSlice->InvLevelScale8x8_Intra[2][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[10][temp];
+ currSlice->InvLevelScale8x8_Inter[2][k][x] = dequant_coef8[k][x] * currSlice->qmatrix[11][temp];
+ x++;
+ }
+ }
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/sei.c b/Src/h264dec/ldecod/src/sei.c
new file mode 100644
index 00000000..faa4f8ec
--- /dev/null
+++ b/Src/h264dec/ldecod/src/sei.c
@@ -0,0 +1,2132 @@
+/*!
+ ************************************************************************
+ * \file sei.c
+ *
+ * \brief
+ * Functions to implement SEI messages
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Dong Tian <tian@cs.tut.fi>
+ * - Karsten Suehring <suehring@hhi.de>
+ ************************************************************************
+ */
+
+#include "contributors.h"
+
+#include <math.h>
+#include "global.h"
+#include "memalloc.h"
+#include "sei.h"
+#include "vlc.h"
+#include "header.h"
+#include "mbuffer.h"
+#include "parset.h"
+
+
+// #define PRINT_BUFFERING_PERIOD_INFO // uncomment to print buffering period SEI info
+// #define PRINT_PCITURE_TIMING_INFO // uncomment to print picture timing SEI info
+// #define WRITE_MAP_IMAGE // uncomment to write spare picture map
+// #define PRINT_SUBSEQUENCE_INFO // uncomment to print sub-sequence SEI info
+// #define PRINT_SUBSEQUENCE_LAYER_CHAR // uncomment to print sub-sequence layer characteristics SEI info
+// #define PRINT_SUBSEQUENCE_CHAR // uncomment to print sub-sequence characteristics SEI info
+// #define PRINT_SCENE_INFORMATION // uncomment to print scene information SEI info
+// #define PRINT_PAN_SCAN_RECT // uncomment to print pan-scan rectangle SEI info
+// #define PRINT_RECOVERY_POINT // uncomment to print random access point SEI info
+// #define PRINT_FILLER_PAYLOAD_INFO // uncomment to print filler payload SEI info
+// #define PRINT_DEC_REF_PIC_MARKING // uncomment to print decoded picture buffer management repetition SEI info
+// #define PRINT_RESERVED_INFO // uncomment to print reserved SEI info
+// #define PRINT_USER_DATA_UNREGISTERED_INFO // uncomment to print unregistered user data SEI info
+// #define PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO // uncomment to print ITU-T T.35 user data SEI info
+// #define PRINT_FULL_FRAME_FREEZE_INFO // uncomment to print full-frame freeze SEI info
+// #define PRINT_FULL_FRAME_FREEZE_RELEASE_INFO // uncomment to print full-frame freeze release SEI info
+// #define PRINT_FULL_FRAME_SNAPSHOT_INFO // uncomment to print full-frame snapshot SEI info
+// #define PRINT_PROGRESSIVE_REFINEMENT_END_INFO // uncomment to print Progressive refinement segment start SEI info
+// #define PRINT_PROGRESSIVE_REFINEMENT_END_INFO // uncomment to print Progressive refinement segment end SEI info
+// #define PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO // uncomment to print Motion-constrained slice group set SEI info
+// #define PRINT_FILM_GRAIN_CHARACTERISTICS_INFO // uncomment to print Film grain characteristics SEI info
+// #define PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO // uncomment to print deblocking filter display preference SEI info
+// #define PRINT_STEREO_VIDEO_INFO_INFO // uncomment to print stero video SEI info
+// #define PRINT_TONE_MAPPING // uncomment to print tone-mapping SEI info
+// #define PRINT_POST_FILTER_HINT_INFO // uncomment to print post-filter hint SEI info
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the SEI rbsp
+ * \param msg
+ * a pointer that point to the sei message.
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void InterpretSEIMessage(byte* msg, int size, VideoParameters *p_Vid)
+{
+ int payload_type = 0;
+ int payload_size = 0;
+ int offset = 1;
+ byte tmp_byte;
+
+ do
+ {
+ // sei_message();
+ payload_type = 0;
+ tmp_byte = msg[offset++];
+ while (tmp_byte == 0xFF)
+ {
+ payload_type += 255;
+ tmp_byte = msg[offset++];
+ }
+ payload_type += tmp_byte; // this is the last byte
+
+ payload_size = 0;
+ tmp_byte = msg[offset++];
+ while (tmp_byte == 0xFF)
+ {
+ payload_size += 255;
+ tmp_byte = msg[offset++];
+ }
+ payload_size += tmp_byte; // this is the last byte
+
+ switch ( payload_type ) // sei_payload( type, size );
+ {
+ case SEI_BUFFERING_PERIOD:
+ interpret_buffering_period_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_PIC_TIMING:
+ interpret_picture_timing_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_PAN_SCAN_RECT:
+ interpret_pan_scan_rect_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_FILLER_PAYLOAD:
+ interpret_filler_payload_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_USER_DATA_REGISTERED_ITU_T_T35:
+ interpret_user_data_registered_itu_t_t35_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_USER_DATA_UNREGISTERED:
+ interpret_user_data_unregistered_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_RECOVERY_POINT:
+ interpret_recovery_point_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_DEC_REF_PIC_MARKING_REPETITION:
+ interpret_dec_ref_pic_marking_repetition_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_SPARE_PIC:
+ interpret_spare_pic( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_SCENE_INFO:
+ interpret_scene_information( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_SUB_SEQ_INFO:
+ interpret_subsequence_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_SUB_SEQ_LAYER_CHARACTERISTICS:
+ interpret_subsequence_layer_characteristics_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_SUB_SEQ_CHARACTERISTICS:
+ interpret_subsequence_characteristics_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_FULL_FRAME_FREEZE:
+ interpret_full_frame_freeze_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_FULL_FRAME_FREEZE_RELEASE:
+ interpret_full_frame_freeze_release_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_FULL_FRAME_SNAPSHOT:
+ interpret_full_frame_snapshot_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START:
+ interpret_progressive_refinement_start_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END:
+ interpret_progressive_refinement_end_info( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET:
+ interpret_motion_constrained_slice_group_set_info( msg+offset, payload_size, p_Vid );
+ case SEI_FILM_GRAIN_CHARACTERISTICS:
+ interpret_film_grain_characteristics_info ( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE:
+ interpret_deblocking_filter_display_preference_info ( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_STEREO_VIDEO_INFO:
+ interpret_stereo_video_info_info ( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_TONE_MAPPING:
+ interpret_tone_mapping( msg+offset, payload_size, p_Vid );
+ break;
+ case SEI_POST_FILTER_HINTS:
+ interpret_post_filter_hints_info ( msg+offset, payload_size, p_Vid );
+ default:
+ interpret_reserved_info( msg+offset, payload_size, p_Vid );
+ break;
+ }
+ offset += payload_size;
+
+ } while( msg[offset] != 0x80 ); // more_rbsp_data() msg[offset] != 0x80
+ // ignore the trailing bits rbsp_trailing_bits();
+ assert(msg[offset] == 0x80); // this is the trailing bits
+ assert( offset+1 == size );
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Interpret the spare picture SEI message
+* \param payload
+* a pointer that point to the sei payload
+* \param size
+* the size of the sei message
+* \param p_Vid
+* the image pointer
+*
+************************************************************************
+*/
+void interpret_spare_pic( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int i,x,y;
+ Bitstream* buf;
+ int bit0, bit1, bitc, no_bit0;
+ int target_frame_num = 0;
+ int num_spare_pics;
+ int delta_spare_frame_num, CandidateSpareFrameNum, SpareFrameNum = 0;
+ int ref_area_indicator;
+
+ int m, n, left, right, top, bottom,directx, directy;
+ byte ***map;
+
+#ifdef WRITE_MAP_IMAGE
+ int symbol_size_in_bytes = p_Vid->pic_unit_bitsize_on_disk/8;
+ int j, k, i0, j0, tmp, kk;
+ char filename[20] = "map_dec.yuv";
+ FILE *fp;
+ imgpel** Y;
+ static int old_pn=-1;
+ static int first = 1;
+
+ printf("Spare picture SEI message\n");
+#endif
+
+
+
+ assert( payload!=NULL);
+ assert( p_Vid!=NULL);
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ target_frame_num = ue_v("SEI: target_frame_num", buf);
+
+#ifdef WRITE_MAP_IMAGE
+ printf( "target_frame_num is %d\n", target_frame_num );
+#endif
+
+ num_spare_pics = 1 + ue_v("SEI: num_spare_pics_minus1", buf);
+
+#ifdef WRITE_MAP_IMAGE
+ printf( "num_spare_pics is %d\n", num_spare_pics );
+#endif
+
+ get_mem3D(&map, num_spare_pics, p_Vid->height >> 4, p_Vid->width >> 4);
+
+ for (i=0; i<num_spare_pics; i++)
+ {
+ if (i==0)
+ {
+ CandidateSpareFrameNum = target_frame_num - 1;
+ if ( CandidateSpareFrameNum < 0 ) CandidateSpareFrameNum = MAX_FN - 1;
+ }
+ else
+ CandidateSpareFrameNum = SpareFrameNum;
+
+ delta_spare_frame_num = ue_v("SEI: delta_spare_frame_num", buf);
+
+ SpareFrameNum = CandidateSpareFrameNum - delta_spare_frame_num;
+ if( SpareFrameNum < 0 )
+ SpareFrameNum = MAX_FN + SpareFrameNum;
+
+ ref_area_indicator = ue_v("SEI: ref_area_indicator", buf);
+
+ switch ( ref_area_indicator )
+ {
+ case 0: // The whole frame can serve as spare picture
+ for (y=0; y<p_Vid->height >> 4; y++)
+ for (x=0; x<p_Vid->width >> 4; x++)
+ map[i][y][x] = 0;
+ break;
+ case 1: // The map is not compressed
+ for (y=0; y<p_Vid->height >> 4; y++)
+ for (x=0; x<p_Vid->width >> 4; x++)
+ {
+ map[i][y][x] = (byte) u_1("SEI: ref_mb_indicator", buf);
+ }
+ break;
+ case 2: // The map is compressed
+ //!KS: could not check this function, description is unclear (as stated in Ed. Note)
+ bit0 = 0;
+ bit1 = 1;
+ bitc = bit0;
+ no_bit0 = -1;
+
+ x = ( (p_Vid->width >> 4) - 1 ) / 2;
+ y = ( (p_Vid->height >> 4) - 1 ) / 2;
+ left = right = x;
+ top = bottom = y;
+ directx = 0;
+ directy = 1;
+
+ for (m=0; m<p_Vid->height >> 4; m++)
+ for (n=0; n<p_Vid->width >> 4; n++)
+ {
+
+ if (no_bit0<0)
+ {
+ no_bit0 = ue_v("SEI: zero_run_length", buf);
+ }
+ if (no_bit0>0)
+ map[i][y][x] = (byte) bit0;
+ else
+ map[i][y][x] = (byte) bit1;
+ no_bit0--;
+
+ // go to the next mb:
+ if ( directx == -1 && directy == 0 )
+ {
+ if (x > left) x--;
+ else if (x == 0)
+ {
+ y = bottom + 1;
+ bottom++;
+ directx = 1;
+ directy = 0;
+ }
+ else if (x == left)
+ {
+ x--;
+ left--;
+ directx = 0;
+ directy = 1;
+ }
+ }
+ else if ( directx == 1 && directy == 0 )
+ {
+ if (x < right) x++;
+ else if (x == (p_Vid->width >> 4) - 1)
+ {
+ y = top - 1;
+ top--;
+ directx = -1;
+ directy = 0;
+ }
+ else if (x == right)
+ {
+ x++;
+ right++;
+ directx = 0;
+ directy = -1;
+ }
+ }
+ else if ( directx == 0 && directy == -1 )
+ {
+ if ( y > top) y--;
+ else if (y == 0)
+ {
+ x = left - 1;
+ left--;
+ directx = 0;
+ directy = 1;
+ }
+ else if (y == top)
+ {
+ y--;
+ top--;
+ directx = -1;
+ directy = 0;
+ }
+ }
+ else if ( directx == 0 && directy == 1 )
+ {
+ if (y < bottom) y++;
+ else if (y == (p_Vid->height >> 4) - 1)
+ {
+ x = right+1;
+ right++;
+ directx = 0;
+ directy = -1;
+ }
+ else if (y == bottom)
+ {
+ y++;
+ bottom++;
+ directx = 1;
+ directy = 0;
+ }
+ }
+
+
+ }
+ break;
+ default:
+ printf( "Wrong ref_area_indicator %d!\n", ref_area_indicator );
+ exit(0);
+ break;
+ }
+
+ } // end of num_spare_pics
+
+#ifdef WRITE_MAP_IMAGE
+ // begin to write map seq
+ if ( old_pn != p_Vid->number )
+ {
+ old_pn = p_Vid->number;
+ get_mem2Dpel(&Y, p_Vid->height, p_Vid->width);
+ if (first)
+ {
+ fp = fopen( filename, "wb" );
+ first = 0;
+ }
+ else
+ fp = fopen( filename, "ab" );
+ assert( fp != NULL );
+ for (kk=0; kk<num_spare_pics; kk++)
+ {
+ for (i=0; i < p_Vid->height >> 4; i++)
+ for (j=0; j < p_Vid->width >> 4; j++)
+ {
+ tmp=map[kk][i][j]==0? p_Vid->max_pel_value_comp[0] : 0;
+ for (i0=0; i0<16; i0++)
+ for (j0=0; j0<16; j0++)
+ Y[i*16+i0][j*16+j0]=tmp;
+ }
+
+ // write the map image
+ for (i=0; i < p_Vid->height; i++)
+ for (j=0; j < p_Vid->width; j++)
+ fwrite(&(Y[i][j]), symbol_size_in_bytes, 1, p_out);
+
+ for (k=0; k < 2; k++)
+ for (i=0; i < p_Vid->height>>1; i++)
+ for (j=0; j < p_Vid->width>>1; j++)
+ fwrite(&(p_Vid->dc_pred_value_comp[1]), symbol_size_in_bytes, 1, p_out);
+ }
+ fclose( fp );
+ free_mem2Dpel( Y );
+ }
+ // end of writing map image
+#undef WRITE_MAP_IMAGE
+#endif
+
+ free_mem3D( map );
+
+ free(buf);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Sub-sequence information SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_subsequence_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ Bitstream* buf;
+ int sub_seq_layer_num, sub_seq_id, first_ref_pic_flag, leading_non_ref_pic_flag, last_pic_flag,
+ sub_seq_frame_num_flag, sub_seq_frame_num;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ sub_seq_layer_num = ue_v("SEI: sub_seq_layer_num" , buf);
+ sub_seq_id = ue_v("SEI: sub_seq_id" , buf);
+ first_ref_pic_flag = u_1 ("SEI: first_ref_pic_flag" , buf);
+ leading_non_ref_pic_flag = u_1 ("SEI: leading_non_ref_pic_flag", buf);
+ last_pic_flag = u_1 ("SEI: last_pic_flag" , buf);
+ sub_seq_frame_num_flag = u_1 ("SEI: sub_seq_frame_num_flag" , buf);
+ if (sub_seq_frame_num_flag)
+ {
+ sub_seq_frame_num = ue_v("SEI: sub_seq_frame_num" , buf);
+ }
+
+#ifdef PRINT_SUBSEQUENCE_INFO
+ printf("Sub-sequence information SEI message\n");
+ printf("sub_seq_layer_num = %d\n", sub_seq_layer_num );
+ printf("sub_seq_id = %d\n", sub_seq_id);
+ printf("first_ref_pic_flag = %d\n", first_ref_pic_flag);
+ printf("leading_non_ref_pic_flag = %d\n", leading_non_ref_pic_flag);
+ printf("last_pic_flag = %d\n", last_pic_flag);
+ printf("sub_seq_frame_num_flag = %d\n", sub_seq_frame_num_flag);
+ if (sub_seq_frame_num_flag)
+ {
+ printf("sub_seq_frame_num = %d\n", sub_seq_frame_num);
+ }
+#endif
+
+ free(buf);
+#ifdef PRINT_SUBSEQUENCE_INFO
+#undef PRINT_SUBSEQUENCE_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Sub-sequence layer characteristics SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_subsequence_layer_characteristics_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ Bitstream* buf;
+ long num_sub_layers, accurate_statistics_flag, average_bit_rate, average_frame_rate;
+ int i;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ num_sub_layers = 1 + ue_v("SEI: num_sub_layers_minus1", buf);
+
+#ifdef PRINT_SUBSEQUENCE_LAYER_CHAR
+ printf("Sub-sequence layer characteristics SEI message\n");
+ printf("num_sub_layers_minus1 = %d\n", num_sub_layers - 1);
+#endif
+
+ for (i=0; i<num_sub_layers; i++)
+ {
+ accurate_statistics_flag = u_1( "SEI: accurate_statistics_flag", buf);
+ average_bit_rate = u_v(16,"SEI: average_bit_rate" , buf);
+ average_frame_rate = u_v(16,"SEI: average_frame_rate" , buf);
+
+#ifdef PRINT_SUBSEQUENCE_LAYER_CHAR
+ printf("layer %d: accurate_statistics_flag = %ld \n", i, accurate_statistics_flag);
+ printf("layer %d: average_bit_rate = %ld \n", i, average_bit_rate);
+ printf("layer %d: average_frame_rate = %ld \n", i, average_frame_rate);
+#endif
+ }
+ free (buf);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Sub-sequence characteristics SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_subsequence_characteristics_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ Bitstream* buf;
+ int i;
+ int sub_seq_layer_num, sub_seq_id, duration_flag, average_rate_flag, accurate_statistics_flag;
+ unsigned long sub_seq_duration, average_bit_rate, average_frame_rate;
+ int num_referenced_subseqs, ref_sub_seq_layer_num, ref_sub_seq_id, ref_sub_seq_direction;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ sub_seq_layer_num = ue_v("SEI: sub_seq_layer_num", buf);
+ sub_seq_id = ue_v("SEI: sub_seq_id", buf);
+ duration_flag = u_1 ("SEI: duration_flag", buf);
+
+#ifdef PRINT_SUBSEQUENCE_CHAR
+ printf("Sub-sequence characteristics SEI message\n");
+ printf("sub_seq_layer_num = %d\n", sub_seq_layer_num );
+ printf("sub_seq_id = %d\n", sub_seq_id);
+ printf("duration_flag = %d\n", duration_flag);
+#endif
+
+ if ( duration_flag )
+ {
+ sub_seq_duration = u_v (32, "SEI: duration_flag", buf);
+#ifdef PRINT_SUBSEQUENCE_CHAR
+ printf("sub_seq_duration = %ld\n", sub_seq_duration);
+#endif
+ }
+
+ average_rate_flag = u_1 ("SEI: average_rate_flag", buf);
+
+#ifdef PRINT_SUBSEQUENCE_CHAR
+ printf("average_rate_flag = %d\n", average_rate_flag);
+#endif
+
+ if ( average_rate_flag )
+ {
+ accurate_statistics_flag = u_1 ( "SEI: accurate_statistics_flag", buf);
+ average_bit_rate = u_v (16, "SEI: average_bit_rate", buf);
+ average_frame_rate = u_v (16, "SEI: average_frame_rate", buf);
+
+#ifdef PRINT_SUBSEQUENCE_CHAR
+ printf("accurate_statistics_flag = %d\n", accurate_statistics_flag);
+ printf("average_bit_rate = %ld\n", average_bit_rate);
+ printf("average_frame_rate = %ld\n", average_frame_rate);
+#endif
+ }
+
+ num_referenced_subseqs = ue_v("SEI: num_referenced_subseqs", buf);
+
+#ifdef PRINT_SUBSEQUENCE_CHAR
+ printf("num_referenced_subseqs = %d\n", num_referenced_subseqs);
+#endif
+
+ for (i=0; i<num_referenced_subseqs; i++)
+ {
+ ref_sub_seq_layer_num = ue_v("SEI: ref_sub_seq_layer_num", buf);
+ ref_sub_seq_id = ue_v("SEI: ref_sub_seq_id", buf);
+ ref_sub_seq_direction = u_1 ("SEI: ref_sub_seq_direction", buf);
+
+#ifdef PRINT_SUBSEQUENCE_CHAR
+ printf("ref_sub_seq_layer_num = %d\n", ref_sub_seq_layer_num);
+ printf("ref_sub_seq_id = %d\n", ref_sub_seq_id);
+ printf("ref_sub_seq_direction = %d\n", ref_sub_seq_direction);
+#endif
+ }
+
+ free( buf );
+#ifdef PRINT_SUBSEQUENCE_CHAR
+#undef PRINT_SUBSEQUENCE_CHAR
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Scene information SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_scene_information( byte* payload, int size, VideoParameters *p_Vid )
+{
+ Bitstream* buf;
+ int scene_id, scene_transition_type, second_scene_id;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ scene_id = ue_v("SEI: scene_id" , buf);
+ scene_transition_type = ue_v("SEI: scene_transition_type", buf);
+ if ( scene_transition_type > 3 )
+ {
+ second_scene_id = ue_v("SEI: scene_transition_type", buf);;
+ }
+
+#ifdef PRINT_SCENE_INFORMATION
+ printf("Scene information SEI message\n");
+ printf("scene_transition_type = %d\n", scene_transition_type);
+ printf("scene_id = %d\n", scene_id);
+ if ( scene_transition_type > 3 )
+ {
+ printf("second_scene_id = %d\n", second_scene_id);
+ }
+#endif
+ free( buf );
+#ifdef PRINT_SCENE_INFORMATION
+#undef PRINT_SCENE_INFORMATION
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Filler payload SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_filler_payload_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int payload_cnt = 0;
+
+ while (payload_cnt<size)
+ {
+ if (payload[payload_cnt] == 0xFF)
+ {
+ payload_cnt++;
+ }
+ }
+
+
+#ifdef PRINT_FILLER_PAYLOAD_INFO
+ printf("Filler payload SEI message\n");
+ if (payload_cnt==size)
+ {
+ printf("read %d bytes of filler payload\n", payload_cnt);
+ }
+ else
+ {
+ printf("error reading filler payload: not all bytes are 0xFF (%d of %d)\n", payload_cnt, size);
+ }
+#endif
+
+#ifdef PRINT_FILLER_PAYLOAD_INFO
+#undef PRINT_FILLER_PAYLOAD_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the User data unregistered SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_user_data_unregistered_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int offset = 0;
+ byte payload_byte;
+
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+ printf("User data unregistered SEI message\n");
+ printf("uuid_iso_11578 = 0x");
+#endif
+ assert (size>=16);
+
+ for (offset = 0; offset < 16; offset++)
+ {
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+ printf("%02x",payload[offset]);
+#endif
+ }
+
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+ printf("\n");
+#endif
+
+ while (offset < size)
+ {
+ payload_byte = payload[offset];
+ offset ++;
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+ printf("Unreg data payload_byte = %d\n", payload_byte);
+#endif
+ }
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+#undef PRINT_USER_DATA_UNREGISTERED_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the User data registered by ITU-T T.35 SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_user_data_registered_itu_t_t35_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int offset = 0;
+ byte itu_t_t35_country_code, itu_t_t35_country_code_extension_byte, payload_byte;
+
+ itu_t_t35_country_code = payload[offset];
+ offset++;
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+ printf("User data registered by ITU-T T.35 SEI message\n");
+ printf(" itu_t_t35_country_code = %d \n", itu_t_t35_country_code);
+#endif
+ if(itu_t_t35_country_code == 0xFF)
+ {
+ itu_t_t35_country_code_extension_byte = payload[offset];
+ offset++;
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+ printf(" ITU_T_T35_COUNTRY_CODE_EXTENSION_BYTE %d \n", itu_t_t35_country_code_extension_byte);
+#endif
+ }
+ while (offset < size)
+ {
+ payload_byte = payload[offset];
+ offset ++;
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+ printf("itu_t_t35 payload_byte = %d\n", payload_byte);
+#endif
+ }
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+#undef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Pan scan rectangle SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_pan_scan_rect_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int pan_scan_rect_cancel_flag;
+ int pan_scan_cnt_minus1, i;
+ int pan_scan_rect_repetition_period;
+ int pan_scan_rect_id, pan_scan_rect_left_offset, pan_scan_rect_right_offset;
+ int pan_scan_rect_top_offset, pan_scan_rect_bottom_offset;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ pan_scan_rect_id = ue_v("SEI: pan_scan_rect_id", buf);
+
+ pan_scan_rect_cancel_flag = u_1("SEI: pan_scan_rect_cancel_flag", buf);
+ if (!pan_scan_rect_cancel_flag)
+ {
+ pan_scan_cnt_minus1 = ue_v("SEI: pan_scan_cnt_minus1", buf);
+ for (i = 0; i <= pan_scan_cnt_minus1; i++)
+ {
+ pan_scan_rect_left_offset = se_v("SEI: pan_scan_rect_left_offset" , buf);
+ pan_scan_rect_right_offset = se_v("SEI: pan_scan_rect_right_offset" , buf);
+ pan_scan_rect_top_offset = se_v("SEI: pan_scan_rect_top_offset" , buf);
+ pan_scan_rect_bottom_offset = se_v("SEI: pan_scan_rect_bottom_offset", buf);
+#ifdef PRINT_PAN_SCAN_RECT
+ printf("Pan scan rectangle SEI message %d/%d\n", i, pan_scan_cnt_minus1);
+ printf("pan_scan_rect_id = %d\n", pan_scan_rect_id);
+ printf("pan_scan_rect_left_offset = %d\n", pan_scan_rect_left_offset);
+ printf("pan_scan_rect_right_offset = %d\n", pan_scan_rect_right_offset);
+ printf("pan_scan_rect_top_offset = %d\n", pan_scan_rect_top_offset);
+ printf("pan_scan_rect_bottom_offset = %d\n", pan_scan_rect_bottom_offset);
+#endif
+ }
+ pan_scan_rect_repetition_period = ue_v("SEI: pan_scan_rect_repetition_period", buf);
+ }
+
+ free (buf);
+#ifdef PRINT_PAN_SCAN_RECT
+#undef PRINT_PAN_SCAN_RECT
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Random access point SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_recovery_point_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int recovery_frame_cnt, exact_match_flag, broken_link_flag, changing_slice_group_idc;
+
+
+ Bitstream* buf;
+
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ recovery_frame_cnt = ue_v( "SEI: recovery_frame_cnt" , buf);
+ exact_match_flag = u_1 ( "SEI: exact_match_flag" , buf);
+ broken_link_flag = u_1 ( "SEI: broken_link_flag" , buf);
+ changing_slice_group_idc = u_v ( 2, "SEI: changing_slice_group_idc", buf);
+
+ p_Vid->recovery_point = 1;
+ p_Vid->recovery_frame_cnt = recovery_frame_cnt;
+
+#ifdef PRINT_RECOVERY_POINT
+ printf("Recovery point SEI message\n");
+ printf("recovery_frame_cnt = %d\n", recovery_frame_cnt);
+ printf("exact_match_flag = %d\n", exact_match_flag);
+ printf("broken_link_flag = %d\n", broken_link_flag);
+ printf("changing_slice_group_idc = %d\n", changing_slice_group_idc);
+#endif
+ free (buf);
+#ifdef PRINT_RECOVERY_POINT
+#undef PRINT_RECOVERY_POINT
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Decoded Picture Buffer Management Repetition SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_dec_ref_pic_marking_repetition_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int original_idr_flag, original_frame_num;
+ int original_field_pic_flag, original_bottom_field_flag;
+
+ DecRefPicMarking_t *tmp_drpm;
+
+ DecRefPicMarking_t *old_drpm;
+ int old_idr_flag , old_no_output_of_prior_pics_flag, old_long_term_reference_flag , old_adaptive_ref_pic_buffering_flag;
+
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ original_idr_flag = u_1 ( "SEI: original_idr_flag" , buf);
+ original_frame_num = ue_v( "SEI: original_frame_num" , buf);
+
+ if ( !p_Vid->active_sps->frame_mbs_only_flag )
+ {
+ original_field_pic_flag = u_1 ( "SEI: original_field_pic_flag", buf);
+ if ( original_field_pic_flag )
+ {
+ original_bottom_field_flag = u_1 ( "SEI: original_bottom_field_flag", buf);
+ }
+ }
+
+#ifdef PRINT_DEC_REF_PIC_MARKING
+ printf("Decoded Picture Buffer Management Repetition SEI message\n");
+ printf("original_idr_flag = %d\n", original_idr_flag);
+ printf("original_frame_num = %d\n", original_frame_num);
+ if ( active_sps->frame_mbs_only_flag )
+ {
+ printf("original_field_pic_flag = %d\n", original_field_pic_flag);
+ if ( original_field_pic_flag )
+ {
+ printf("original_bottom_field_flag = %d\n", original_bottom_field_flag);
+ }
+ }
+#endif
+
+ // we need to save everything that is probably overwritten in dec_ref_pic_marking()
+ old_drpm = p_Vid->dec_ref_pic_marking_buffer;
+ old_idr_flag = p_Vid->idr_flag;
+
+ old_no_output_of_prior_pics_flag = p_Vid->no_output_of_prior_pics_flag;
+ old_long_term_reference_flag = p_Vid->long_term_reference_flag;
+ old_adaptive_ref_pic_buffering_flag = p_Vid->adaptive_ref_pic_buffering_flag;
+
+ // set new initial values
+ p_Vid->idr_flag = original_idr_flag;
+ p_Vid->dec_ref_pic_marking_buffer = NULL;
+
+ dec_ref_pic_marking(p_Vid, buf);
+
+ // print out decoded values
+#ifdef PRINT_DEC_REF_PIC_MARKING
+ if (p_Vid->idr_flag)
+ {
+ printf("no_output_of_prior_pics_flag = %d\n", p_Vid->no_output_of_prior_pics_flag);
+ printf("long_term_reference_flag = %d\n", p_Vid->long_term_reference_flag);
+ }
+ else
+ {
+ printf("adaptive_ref_pic_buffering_flag = %d\n", p_Vid->adaptive_ref_pic_buffering_flag);
+ if (p_Vid->adaptive_ref_pic_buffering_flag)
+ {
+ tmp_drpm=p_Vid->dec_ref_pic_marking_buffer;
+ while (tmp_drpm != NULL)
+ {
+ printf("memory_management_control_operation = %d\n", tmp_drpm->memory_management_control_operation);
+
+ if ((tmp_drpm->memory_management_control_operation==1)||(tmp_drpm->memory_management_control_operation==3))
+ {
+ printf("difference_of_pic_nums_minus1 = %d\n", tmp_drpm->difference_of_pic_nums_minus1);
+ }
+ if (tmp_drpm->memory_management_control_operation==2)
+ {
+ printf("long_term_pic_num = %d\n", tmp_drpm->long_term_pic_num);
+ }
+ if ((tmp_drpm->memory_management_control_operation==3)||(tmp_drpm->memory_management_control_operation==6))
+ {
+ printf("long_term_frame_idx = %d\n", tmp_drpm->long_term_frame_idx);
+ }
+ if (tmp_drpm->memory_management_control_operation==4)
+ {
+ printf("max_long_term_pic_idx_plus1 = %d\n", tmp_drpm->max_long_term_frame_idx_plus1);
+ }
+ tmp_drpm = tmp_drpm->Next;
+ }
+ }
+ }
+#endif
+
+ while (p_Vid->dec_ref_pic_marking_buffer)
+ {
+ tmp_drpm=p_Vid->dec_ref_pic_marking_buffer;
+
+ p_Vid->dec_ref_pic_marking_buffer=tmp_drpm->Next;
+ free (tmp_drpm);
+ }
+
+ // restore old values in p_Vid
+ p_Vid->dec_ref_pic_marking_buffer = old_drpm;
+ p_Vid->idr_flag = old_idr_flag;
+ p_Vid->no_output_of_prior_pics_flag = old_no_output_of_prior_pics_flag;
+ p_Vid->long_term_reference_flag = old_long_term_reference_flag;
+ p_Vid->adaptive_ref_pic_buffering_flag = old_adaptive_ref_pic_buffering_flag;
+
+ free (buf);
+#ifdef PRINT_DEC_REF_PIC_MARKING
+#undef PRINT_DEC_REF_PIC_MARKING
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Full-frame freeze SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_full_frame_freeze_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int full_frame_freeze_repetition_period;
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ full_frame_freeze_repetition_period = ue_v( "SEI: full_frame_freeze_repetition_period" , buf);
+
+#ifdef PRINT_FULL_FRAME_FREEZE_INFO
+ printf("full_frame_freeze_repetition_period = %d\n", full_frame_freeze_repetition_period);
+#endif
+
+ free (buf);
+#ifdef PRINT_FULL_FRAME_FREEZE_INFO
+#undef PRINT_FULL_FRAME_FREEZE_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Full-frame freeze release SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_full_frame_freeze_release_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+#ifdef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO
+ printf("Full-frame freeze release SEI message\n");
+ if (size)
+ {
+ printf("payload size of this message should be zero, but is %d bytes.\n", size);
+ }
+#endif
+
+#ifdef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO
+#undef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Full-frame snapshot SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_full_frame_snapshot_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int snapshot_id;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ snapshot_id = ue_v("SEI: snapshot_id", buf);
+
+#ifdef PRINT_FULL_FRAME_SNAPSHOT_INFO
+ printf("Full-frame snapshot SEI message\n");
+ printf("snapshot_id = %d\n", snapshot_id);
+#endif
+ free (buf);
+#ifdef PRINT_FULL_FRAME_SNAPSHOT_INFO
+#undef PRINT_FULL_FRAME_SNAPSHOT_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Progressive refinement segment start SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_progressive_refinement_start_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int progressive_refinement_id, num_refinement_steps_minus1;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ progressive_refinement_id = ue_v("SEI: progressive_refinement_id" , buf);
+ num_refinement_steps_minus1 = ue_v("SEI: num_refinement_steps_minus1", buf);
+
+#ifdef PRINT_PROGRESSIVE_REFINEMENT_START_INFO
+ printf("Progressive refinement segment start SEI message\n");
+ printf("progressive_refinement_id = %d\n", progressive_refinement_id);
+ printf("num_refinement_steps_minus1 = %d\n", num_refinement_steps_minus1);
+#endif
+ free (buf);
+#ifdef PRINT_PROGRESSIVE_REFINEMENT_START_INFO
+#undef PRINT_PROGRESSIVE_REFINEMENT_START_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Progressive refinement segment end SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_progressive_refinement_end_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int progressive_refinement_id;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ progressive_refinement_id = ue_v("SEI: progressive_refinement_id" , buf);
+
+#ifdef PRINT_PROGRESSIVE_REFINEMENT_END_INFO
+ printf("Progressive refinement segment end SEI message\n");
+ printf("progressive_refinement_id = %d\n", progressive_refinement_id);
+#endif
+ free (buf);
+#ifdef PRINT_PROGRESSIVE_REFINEMENT_END_INFO
+#undef PRINT_PROGRESSIVE_REFINEMENT_END_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Motion-constrained slice group set SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_motion_constrained_slice_group_set_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int num_slice_groups_minus1, slice_group_id, exact_match_flag, pan_scan_rect_flag, pan_scan_rect_id;
+ int i;
+ int sliceGroupSize;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ num_slice_groups_minus1 = ue_v("SEI: num_slice_groups_minus1" , buf);
+ sliceGroupSize = CeilLog2( num_slice_groups_minus1 + 1 );
+#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO
+ printf("Motion-constrained slice group set SEI message\n");
+ printf("num_slice_groups_minus1 = %d\n", num_slice_groups_minus1);
+#endif
+
+ for (i=0; i<=num_slice_groups_minus1;i++)
+ {
+
+ slice_group_id = u_v (sliceGroupSize, "SEI: slice_group_id" , buf) ;
+#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO
+ printf("slice_group_id = %d\n", slice_group_id);
+#endif
+ }
+
+ exact_match_flag = u_1("SEI: exact_match_flag" , buf);
+ pan_scan_rect_flag = u_1("SEI: pan_scan_rect_flag" , buf);
+
+#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO
+ printf("exact_match_flag = %d\n", exact_match_flag);
+ printf("pan_scan_rect_flag = %d\n", pan_scan_rect_flag);
+#endif
+
+ if (pan_scan_rect_flag)
+ {
+ pan_scan_rect_id = ue_v("SEI: pan_scan_rect_id" , buf);
+#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO
+ printf("pan_scan_rect_id = %d\n", pan_scan_rect_id);
+#endif
+ }
+
+ free (buf);
+#ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO
+#undef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the film grain characteristics SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_film_grain_characteristics_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int film_grain_characteristics_cancel_flag;
+ int model_id, separate_colour_description_present_flag;
+ int film_grain_bit_depth_luma_minus8, film_grain_bit_depth_chroma_minus8, film_grain_full_range_flag, film_grain_colour_primaries, film_grain_transfer_characteristics, film_grain_matrix_coefficients;
+ int blending_mode_id, log2_scale_factor, comp_model_present_flag[3];
+ int num_intensity_intervals_minus1, num_model_values_minus1;
+ int intensity_interval_lower_bound, intensity_interval_upper_bound;
+ int comp_model_value;
+ int film_grain_characteristics_repetition_period;
+
+ int c, i, j;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ film_grain_characteristics_cancel_flag = u_1("SEI: film_grain_characteristics_cancel_flag", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("film_grain_characteristics_cancel_flag = %d\n", film_grain_characteristics_cancel_flag);
+#endif
+ if(!film_grain_characteristics_cancel_flag)
+ {
+
+ model_id = u_v(2, "SEI: model_id", buf);
+ separate_colour_description_present_flag = u_1("SEI: separate_colour_description_present_flag", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("model_id = %d\n", model_id);
+ printf("separate_colour_description_present_flag = %d\n", separate_colour_description_present_flag);
+#endif
+ if (separate_colour_description_present_flag)
+ {
+ film_grain_bit_depth_luma_minus8 = u_v(3, "SEI: film_grain_bit_depth_luma_minus8", buf);
+ film_grain_bit_depth_chroma_minus8 = u_v(3, "SEI: film_grain_bit_depth_chroma_minus8", buf);
+ film_grain_full_range_flag = u_v(1, "SEI: film_grain_full_range_flag", buf);
+ film_grain_colour_primaries = u_v(8, "SEI: film_grain_colour_primaries", buf);
+ film_grain_transfer_characteristics = u_v(8, "SEI: film_grain_transfer_characteristics", buf);
+ film_grain_matrix_coefficients = u_v(8, "SEI: film_grain_matrix_coefficients", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("film_grain_bit_depth_luma_minus8 = %d\n", film_grain_bit_depth_luma_minus8);
+ printf("film_grain_bit_depth_chroma_minus8 = %d\n", film_grain_bit_depth_chroma_minus8);
+ printf("film_grain_full_range_flag = %d\n", film_grain_full_range_flag);
+ printf("film_grain_colour_primaries = %d\n", film_grain_colour_primaries);
+ printf("film_grain_transfer_characteristics = %d\n", film_grain_transfer_characteristics);
+ printf("film_grain_matrix_coefficients = %d\n", film_grain_matrix_coefficients);
+#endif
+ }
+ blending_mode_id = u_v(2, "SEI: blending_mode_id", buf);
+ log2_scale_factor = u_v(4, "SEI: log2_scale_factor", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("blending_mode_id = %d\n", blending_mode_id);
+ printf("log2_scale_factor = %d\n", log2_scale_factor);
+#endif
+ for (c = 0; c < 3; c ++)
+ {
+ comp_model_present_flag[c] = u_1("SEI: comp_model_present_flag", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("comp_model_present_flag = %d\n", comp_model_present_flag[c]);
+#endif
+ }
+ for (c = 0; c < 3; c ++)
+ if (comp_model_present_flag[c])
+ {
+ num_intensity_intervals_minus1 = u_v(8, "SEI: num_intensity_intervals_minus1", buf);
+ num_model_values_minus1 = u_v(3, "SEI: num_model_values_minus1", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("num_intensity_intervals_minus1 = %d\n", num_intensity_intervals_minus1);
+ printf("num_model_values_minus1 = %d\n", num_model_values_minus1);
+#endif
+ for (i = 0; i <= num_intensity_intervals_minus1; i ++)
+ {
+ intensity_interval_lower_bound = u_v(8, "SEI: intensity_interval_lower_bound", buf);
+ intensity_interval_upper_bound = u_v(8, "SEI: intensity_interval_upper_bound", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("intensity_interval_lower_bound = %d\n", intensity_interval_lower_bound);
+ printf("intensity_interval_upper_bound = %d\n", intensity_interval_upper_bound);
+#endif
+ for (j = 0; j <= num_model_values_minus1; j++)
+ {
+ comp_model_value = se_v("SEI: comp_model_value", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("comp_model_value = %d\n", comp_model_value);
+#endif
+ }
+ }
+ }
+ film_grain_characteristics_repetition_period = ue_v("SEI: film_grain_characteristics_repetition_period", buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+ printf("film_grain_characteristics_repetition_period = %d\n", film_grain_characteristics_repetition_period);
+#endif
+ }
+
+ free (buf);
+#ifdef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+#undef PRINT_FILM_GRAIN_CHARACTERISTICS_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the deblocking filter display preference SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_deblocking_filter_display_preference_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int deblocking_display_preference_cancel_flag;
+ int display_prior_to_deblocking_preferred_flag, dec_frame_buffering_constraint_flag, deblocking_display_preference_repetition_period;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ deblocking_display_preference_cancel_flag = u_1("SEI: deblocking_display_preference_cancel_flag", buf);
+#ifdef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO
+ printf("deblocking_display_preference_cancel_flag = %d\n", deblocking_display_preference_cancel_flag);
+#endif
+ if(!deblocking_display_preference_cancel_flag)
+ {
+ display_prior_to_deblocking_preferred_flag = u_1("SEI: display_prior_to_deblocking_preferred_flag", buf);
+ dec_frame_buffering_constraint_flag = u_1("SEI: dec_frame_buffering_constraint_flag", buf);
+ deblocking_display_preference_repetition_period = ue_v("SEI: deblocking_display_preference_repetition_period", buf);
+#ifdef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO
+ printf("display_prior_to_deblocking_preferred_flag = %d\n", display_prior_to_deblocking_preferred_flag);
+ printf("dec_frame_buffering_constraint_flag = %d\n", dec_frame_buffering_constraint_flag);
+ printf("deblocking_display_preference_repetition_period = %d\n", deblocking_display_preference_repetition_period);
+#endif
+ }
+
+ free (buf);
+#ifdef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO
+#undef PRINT_DEBLOCKING_FILTER_DISPLAY_PREFERENCE_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the stereo video info SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_stereo_video_info_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int field_views_flags;
+ int top_field_is_left_view_flag, current_frame_is_left_view_flag, next_frame_is_second_view_flag;
+ int left_view_self_contained_flag;
+ int right_view_self_contained_flag;
+
+ Bitstream* buf;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ field_views_flags = u_1("SEI: field_views_flags", buf);
+#ifdef PRINT_STEREO_VIDEO_INFO_INFO
+ printf("field_views_flags = %d\n", field_views_flags);
+#endif
+ if (field_views_flags)
+ {
+ top_field_is_left_view_flag = u_1("SEI: top_field_is_left_view_flag", buf);
+#ifdef PRINT_STEREO_VIDEO_INFO_INFO
+ printf("top_field_is_left_view_flag = %d\n", top_field_is_left_view_flag);
+#endif
+ }
+ else
+ {
+ current_frame_is_left_view_flag = u_1("SEI: current_frame_is_left_view_flag", buf);
+ next_frame_is_second_view_flag = u_1("SEI: next_frame_is_second_view_flag", buf);
+#ifdef PRINT_STEREO_VIDEO_INFO_INFO
+ printf("current_frame_is_left_view_flag = %d\n", current_frame_is_left_view_flag);
+ printf("next_frame_is_second_view_flag = %d\n", next_frame_is_second_view_flag);
+#endif
+ }
+
+ left_view_self_contained_flag = u_1("SEI: left_view_self_contained_flag", buf);
+ right_view_self_contained_flag = u_1("SEI: right_view_self_contained_flag", buf);
+#ifdef PRINT_STEREO_VIDEO_INFO_INFO
+ printf("left_view_self_contained_flag = %d\n", left_view_self_contained_flag);
+ printf("right_view_self_contained_flag = %d\n", right_view_self_contained_flag);
+#endif
+
+ free (buf);
+#ifdef PRINT_STEREO_VIDEO_INFO_INFO
+#undef PRINT_STEREO_VIDEO_INFO_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Reserved SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_reserved_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int offset = 0;
+ byte payload_byte;
+
+#ifdef PRINT_RESERVED_INFO
+ printf("Reserved SEI message\n");
+#endif
+
+ while (offset < size)
+ {
+ payload_byte = payload[offset];
+ offset ++;
+#ifdef PRINT_RESERVED_INFO
+ printf("reserved_sei_message_payload_byte = %d\n", payload_byte);
+#endif
+ }
+#ifdef PRINT_RESERVED_INFO
+#undef PRINT_RESERVED_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Buffering period SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_buffering_period_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ int seq_parameter_set_id, initial_cpb_removal_delay, initial_cpb_removal_delay_offset;
+ unsigned int k;
+
+ Bitstream* buf;
+ seq_parameter_set_rbsp_t *sps;
+
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ seq_parameter_set_id = ue_v("SEI: seq_parameter_set_id" , buf);
+
+ sps = &p_Vid->SeqParSet[seq_parameter_set_id];
+
+ activate_sps(p_Vid, sps);
+
+#ifdef PRINT_BUFFERING_PERIOD_INFO
+ printf("Buffering period SEI message\n");
+ printf("seq_parameter_set_id = %d\n", seq_parameter_set_id);
+#endif
+
+ // Note: NalHrdBpPresentFlag and CpbDpbDelaysPresentFlag can also be set "by some means not specified in this Recommendation | International Standard"
+ if (sps->vui_parameters_present_flag)
+ {
+
+ if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag)
+ {
+ for (k=0; k<sps->vui_seq_parameters.nal_hrd_parameters.cpb_cnt_minus1+1; k++)
+ {
+ initial_cpb_removal_delay = u_v(sps->vui_seq_parameters.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay" , buf);
+ initial_cpb_removal_delay_offset = u_v(sps->vui_seq_parameters.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay_offset" , buf);
+
+#ifdef PRINT_BUFFERING_PERIOD_INFO
+ printf("nal initial_cpb_removal_delay[%d] = %d\n", k, initial_cpb_removal_delay);
+ printf("nal initial_cpb_removal_delay_offset[%d] = %d\n", k, initial_cpb_removal_delay_offset);
+#endif
+ }
+ }
+
+ if (sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)
+ {
+ for (k=0; k<sps->vui_seq_parameters.vcl_hrd_parameters.cpb_cnt_minus1+1; k++)
+ {
+ initial_cpb_removal_delay = u_v(sps->vui_seq_parameters.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay" , buf);
+ initial_cpb_removal_delay_offset = u_v(sps->vui_seq_parameters.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay_offset" , buf);
+
+#ifdef PRINT_BUFFERING_PERIOD_INFO
+ printf("vcl initial_cpb_removal_delay[%d] = %d\n", k, initial_cpb_removal_delay);
+ printf("vcl initial_cpb_removal_delay_offset[%d] = %d\n", k, initial_cpb_removal_delay_offset);
+#endif
+ }
+ }
+ }
+
+ free (buf);
+#ifdef PRINT_BUFFERING_PERIOD_INFO
+#undef PRINT_BUFFERING_PERIOD_INFO
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the Picture timing SEI message
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_picture_timing_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ int cpb_removal_delay, dpb_output_delay, picture_structure_present_flag, picture_structure;
+ int clock_time_stamp_flag;
+ int ct_type, nuit_field_based_flag, counting_type, full_timestamp_flag, discontinuity_flag, cnt_dropped_flag, nframes;
+ int seconds_value, minutes_value, hours_value, seconds_flag, minutes_flag, hours_flag, time_offset;
+ int NumClockTs = 0;
+ int i;
+
+ int cpb_removal_len = 24;
+ int dpb_output_len = 24;
+
+ Boolean CpbDpbDelaysPresentFlag;
+
+ Bitstream* buf;
+
+ if (NULL==active_sps)
+ {
+ fprintf (stderr, "Warning: no active SPS, timing SEI cannot be parsed\n");
+ return;
+ }
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("Picture timing SEI message\n");
+#endif
+
+ // CpbDpbDelaysPresentFlag can also be set "by some means not specified in this Recommendation | International Standard"
+ CpbDpbDelaysPresentFlag = (Boolean) (active_sps->vui_parameters_present_flag
+ && ( (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag != 0)
+ ||(active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag != 0)));
+
+ if (CpbDpbDelaysPresentFlag )
+ {
+ if (active_sps->vui_parameters_present_flag)
+ {
+ if (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag)
+ {
+ cpb_removal_len = active_sps->vui_seq_parameters.nal_hrd_parameters.cpb_removal_delay_length_minus1 + 1;
+ dpb_output_len = active_sps->vui_seq_parameters.nal_hrd_parameters.dpb_output_delay_length_minus1 + 1;
+ }
+ else if (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)
+ {
+ cpb_removal_len = active_sps->vui_seq_parameters.vcl_hrd_parameters.cpb_removal_delay_length_minus1 + 1;
+ dpb_output_len = active_sps->vui_seq_parameters.vcl_hrd_parameters.dpb_output_delay_length_minus1 + 1;
+ }
+ }
+
+ if ((active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag)||
+ (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag))
+ {
+ cpb_removal_delay = u_v(cpb_removal_len, "SEI: cpb_removal_delay" , buf);
+ dpb_output_delay = u_v(dpb_output_len, "SEI: dpb_output_delay" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("cpb_removal_delay = %d\n",cpb_removal_delay);
+ printf("dpb_output_delay = %d\n",dpb_output_delay);
+#endif
+ }
+ }
+
+ if (!active_sps->vui_parameters_present_flag)
+ {
+ picture_structure_present_flag = 0;
+ }
+ else
+ {
+ picture_structure_present_flag = active_sps->vui_seq_parameters.pic_struct_present_flag;
+ }
+
+ if (picture_structure_present_flag)
+ {
+ picture_structure = u_v(4, "SEI: pic_struct" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("picture_structure = %d\n",picture_structure);
+#endif
+ switch (picture_structure)
+ {
+ case 0:
+ case 1:
+ case 2:
+ NumClockTs = 1;
+ break;
+ case 3:
+ case 4:
+ case 7:
+ NumClockTs = 2;
+ break;
+ case 5:
+ case 6:
+ case 8:
+ NumClockTs = 3;
+ break;
+ default:
+ error("reserved picture_structure used (can't determine NumClockTs)", 500);
+ }
+ for (i=0; i<NumClockTs; i++)
+ {
+ clock_time_stamp_flag = u_1("SEI: clock_time_stamp_flag" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("clock_time_stamp_flag = %d\n",clock_time_stamp_flag);
+#endif
+ if (clock_time_stamp_flag)
+ {
+ ct_type = u_v(2, "SEI: ct_type" , buf);
+ nuit_field_based_flag = u_1( "SEI: nuit_field_based_flag" , buf);
+ counting_type = u_v(5, "SEI: counting_type" , buf);
+ full_timestamp_flag = u_1( "SEI: full_timestamp_flag" , buf);
+ discontinuity_flag = u_1( "SEI: discontinuity_flag" , buf);
+ cnt_dropped_flag = u_1( "SEI: cnt_dropped_flag" , buf);
+ nframes = u_v(8, "SEI: nframes" , buf);
+
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("ct_type = %d\n",ct_type);
+ printf("nuit_field_based_flag = %d\n",nuit_field_based_flag);
+ printf("full_timestamp_flag = %d\n",full_timestamp_flag);
+ printf("discontinuity_flag = %d\n",discontinuity_flag);
+ printf("cnt_dropped_flag = %d\n",cnt_dropped_flag);
+ printf("nframes = %d\n",nframes);
+#endif
+ if (full_timestamp_flag)
+ {
+ seconds_value = u_v(6, "SEI: seconds_value" , buf);
+ minutes_value = u_v(6, "SEI: minutes_value" , buf);
+ hours_value = u_v(5, "SEI: hours_value" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("seconds_value = %d\n",seconds_value);
+ printf("minutes_value = %d\n",minutes_value);
+ printf("hours_value = %d\n",hours_value);
+#endif
+ }
+ else
+ {
+ seconds_flag = u_1( "SEI: seconds_flag" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("seconds_flag = %d\n",seconds_flag);
+#endif
+ if (seconds_flag)
+ {
+ seconds_value = u_v(6, "SEI: seconds_value" , buf);
+ minutes_flag = u_1( "SEI: minutes_flag" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("seconds_value = %d\n",seconds_value);
+ printf("minutes_flag = %d\n",minutes_flag);
+#endif
+ if(minutes_flag)
+ {
+ minutes_value = u_v(6, "SEI: minutes_value" , buf);
+ hours_flag = u_1( "SEI: hours_flag" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("minutes_value = %d\n",minutes_value);
+ printf("hours_flag = %d\n",hours_flag);
+#endif
+ if(hours_flag)
+ {
+ hours_value = u_v(5, "SEI: hours_value" , buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("hours_value = %d\n",hours_value);
+#endif
+ }
+ }
+ }
+ }
+ {
+ int time_offset_length;
+ if (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)
+ time_offset_length = active_sps->vui_seq_parameters.vcl_hrd_parameters.time_offset_length;
+ else if (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag)
+ time_offset_length = active_sps->vui_seq_parameters.nal_hrd_parameters.time_offset_length;
+ else
+ time_offset_length = 24;
+ if (time_offset_length)
+ time_offset = i_v(time_offset_length, "SEI: time_offset" , buf);
+ else
+ time_offset = 0;
+#ifdef PRINT_PCITURE_TIMING_INFO
+ printf("time_offset = %d\n",time_offset);
+#endif
+ }
+ }
+ }
+ }
+
+ free (buf);
+#ifdef PRINT_PCITURE_TIMING_INFO
+#undef PRINT_PCITURE_TIMING_INFO
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the HDR tone-mapping SEI message (JVT-T060)
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+typedef struct
+{
+ unsigned int tone_map_id;
+ unsigned char tone_map_cancel_flag;
+ unsigned int tone_map_repetition_period;
+ unsigned char coded_data_bit_depth;
+ unsigned char sei_bit_depth;
+ unsigned int model_id;
+ // variables for model 0
+ int min_value;
+ int max_value;
+ // variables for model 1
+ int sigmoid_midpoint;
+ int sigmoid_width;
+ // variables for model 2
+ int start_of_coded_interval[1<<MAX_SEI_BIT_DEPTH];
+ // variables for model 3
+ int num_pivots;
+ int coded_pivot_value[MAX_NUM_PIVOTS];
+ int sei_pivot_value[MAX_NUM_PIVOTS];
+} tone_mapping_struct_tmp;
+
+void interpret_tone_mapping( byte* payload, int size, VideoParameters *p_Vid )
+{
+ tone_mapping_struct_tmp seiToneMappingTmp;
+ Bitstream* buf;
+ int i = 0, max_coded_num, max_output_num;
+
+ memset (&seiToneMappingTmp, 0, sizeof (tone_mapping_struct_tmp));
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ seiToneMappingTmp.tone_map_id = ue_v("SEI: tone_map_id", buf);
+ seiToneMappingTmp.tone_map_cancel_flag = (unsigned char) u_1("SEI: tone_map_cancel_flag", buf);
+
+#ifdef PRINT_TONE_MAPPING
+ printf("Tone-mapping SEI message\n");
+ printf("tone_map_id = %d\n", seiToneMappingTmp.tone_map_id);
+
+ if (seiToneMappingTmp.tone_map_id != 0)
+ printf("WARNING! Tone_map_id != 0, print the SEI message info only. The tone mapping is actually applied only when Tone_map_id==0\n\n");
+ printf("tone_map_cancel_flag = %d\n", seiToneMappingTmp.tone_map_cancel_flag);
+#endif
+
+ if (!seiToneMappingTmp.tone_map_cancel_flag)
+ {
+ seiToneMappingTmp.tone_map_repetition_period = ue_v( "SEI: tone_map_repetition_period", buf);
+ seiToneMappingTmp.coded_data_bit_depth = (unsigned char)u_v (8,"SEI: coded_data_bit_depth" , buf);
+ seiToneMappingTmp.sei_bit_depth = (unsigned char)u_v (8,"SEI: sei_bit_depth" , buf);
+
+ seiToneMappingTmp.model_id = ue_v( "SEI: model_id" , buf);
+
+#ifdef PRINT_TONE_MAPPING
+ printf("tone_map_repetition_period = %d\n", seiToneMappingTmp.tone_map_repetition_period);
+ printf("coded_data_bit_depth = %d\n", seiToneMappingTmp.coded_data_bit_depth);
+ printf("sei_bit_depth = %d\n", seiToneMappingTmp.sei_bit_depth);
+ printf("model_id = %d\n", seiToneMappingTmp.model_id);
+#endif
+
+ max_coded_num = 1<<seiToneMappingTmp.coded_data_bit_depth;
+ max_output_num = 1<<seiToneMappingTmp.sei_bit_depth;
+
+ if (seiToneMappingTmp.model_id == 0)
+ { // linear mapping with clipping
+ seiToneMappingTmp.min_value = u_v (32,"SEI: min_value", buf);
+ seiToneMappingTmp.max_value = u_v (32,"SEI: min_value", buf);
+#ifdef PRINT_TONE_MAPPING
+ printf("min_value = %d, max_value = %d\n", seiToneMappingTmp.min_value, seiToneMappingTmp.max_value);
+#endif
+ }
+ else if (seiToneMappingTmp.model_id == 1)
+ { // sigmoidal mapping
+ seiToneMappingTmp.sigmoid_midpoint = u_v (32,"SEI: sigmoid_midpoint", buf);
+ seiToneMappingTmp.sigmoid_width = u_v (32,"SEI: sigmoid_width", buf);
+#ifdef PRINT_TONE_MAPPING
+ printf("sigmoid_midpoint = %d, sigmoid_width = %d\n", seiToneMappingTmp.sigmoid_midpoint, seiToneMappingTmp.sigmoid_width);
+#endif
+ }
+ else if (seiToneMappingTmp.model_id == 2)
+ { // user defined table mapping
+ for (i=0; i<max_output_num; i++)
+ {
+ seiToneMappingTmp.start_of_coded_interval[i] = u_v((((seiToneMappingTmp.coded_data_bit_depth+7)>>3)<<3), "SEI: start_of_coded_interval" , buf);
+#ifdef PRINT_TONE_MAPPING // too long to print
+ //printf("start_of_coded_interval[%d] = %d\n", i, seiToneMappingTmp.start_of_coded_interval[i]);
+#endif
+ }
+ }
+ else if (seiToneMappingTmp.model_id == 3)
+ { // piece-wise linear mapping
+ seiToneMappingTmp.num_pivots = u_v (16,"SEI: num_pivots", buf);
+#ifdef PRINT_TONE_MAPPING
+ printf("num_pivots = %d\n", seiToneMappingTmp.num_pivots);
+#endif
+ seiToneMappingTmp.coded_pivot_value[0] = 0;
+ seiToneMappingTmp.sei_pivot_value[0] = 0;
+ seiToneMappingTmp.coded_pivot_value[seiToneMappingTmp.num_pivots+1] = max_coded_num-1;
+ seiToneMappingTmp.sei_pivot_value[seiToneMappingTmp.num_pivots+1] = max_output_num-1;
+
+ for (i=1; i < seiToneMappingTmp.num_pivots+1; i++)
+ {
+ seiToneMappingTmp.coded_pivot_value[i] = u_v( (((seiToneMappingTmp.coded_data_bit_depth+7)>>3)<<3), "SEI: coded_pivot_value", buf);
+ seiToneMappingTmp.sei_pivot_value[i] = u_v( (((seiToneMappingTmp.sei_bit_depth+7)>>3)<<3), "SEI: sei_pivot_value", buf);
+#ifdef PRINT_TONE_MAPPING
+ printf("coded_pivot_value[%d] = %d, sei_pivot_value[%d] = %d\n", i, seiToneMappingTmp.coded_pivot_value[i], i, seiToneMappingTmp.sei_pivot_value[i]);
+#endif
+ }
+ }
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+ // Currently, only when the map_id == 0, the tone-mapping is actually applied.
+ if (seiToneMappingTmp.tone_map_id== 0)
+ {
+ int j;
+ p_Vid->seiToneMapping->seiHasTone_mapping = TRUE;
+ p_Vid->seiToneMapping->tone_map_repetition_period = seiToneMappingTmp.tone_map_repetition_period;
+ p_Vid->seiToneMapping->coded_data_bit_depth = seiToneMappingTmp.coded_data_bit_depth;
+ p_Vid->seiToneMapping->sei_bit_depth = seiToneMappingTmp.sei_bit_depth;
+ p_Vid->seiToneMapping->model_id = seiToneMappingTmp.model_id;
+ p_Vid->seiToneMapping->count = 0;
+
+ // generate the look up table of tone mapping
+ switch(seiToneMappingTmp.model_id)
+ {
+ case 0: // linear mapping with clipping
+ for (i=0; i<=seiToneMappingTmp.min_value; i++)
+ p_Vid->seiToneMapping->lut[i] = 0;
+
+ for (i=seiToneMappingTmp.min_value+1; i < seiToneMappingTmp.max_value; i++)
+ p_Vid->seiToneMapping->lut[i] = (imgpel) ((i-seiToneMappingTmp.min_value) * (max_output_num-1)/(seiToneMappingTmp.max_value- seiToneMappingTmp.min_value));
+
+ for (i=seiToneMappingTmp.max_value; i<max_coded_num; i++)
+ p_Vid->seiToneMapping->lut[i] =(imgpel) (max_output_num - 1);
+ break;
+ case 1: // sigmoid mapping
+
+ for (i=0; i < max_coded_num; i++)
+ {
+#if 0
+ int j = (int)(1 + exp( -6*(double)(i-seiToneMappingTmp.sigmoid_midpoint)/seiToneMappingTmp.sigmoid_width));
+ p_Vid->seiToneMapping->lut[i] = ((max_output_num-1)+(j>>1)) / j;
+#else
+ double tmp = 1.0 + exp( -6*(double)(i-seiToneMappingTmp.sigmoid_midpoint)/seiToneMappingTmp.sigmoid_width);
+ p_Vid->seiToneMapping->lut[i] = (imgpel)( (double)(max_output_num-1)/ tmp + 0.5);
+#endif
+ }
+ break;
+ case 2: // user defined table
+ if (0 < max_output_num-1)
+ {
+ for (j=0; j<max_output_num-1; j++)
+ {
+ for (i=seiToneMappingTmp.start_of_coded_interval[j]; i<seiToneMappingTmp.start_of_coded_interval[j+1]; i++)
+ {
+ p_Vid->seiToneMapping->lut[i] = (imgpel) j;
+ }
+ }
+ p_Vid->seiToneMapping->lut[i] = (imgpel) (max_output_num - 1);
+ }
+ break;
+ case 3: // piecewise linear mapping
+ for (j=0; j<seiToneMappingTmp.num_pivots+1; j++)
+ {
+#if 0
+ slope = ((seiToneMappingTmp.sei_pivot_value[j+1] - seiToneMappingTmp.sei_pivot_value[j])<<16)/(seiToneMappingTmp.coded_pivot_value[j+1]-seiToneMappingTmp.coded_pivot_value[j]);
+ for (i=seiToneMappingTmp.coded_pivot_value[j]; i <= seiToneMappingTmp.coded_pivot_value[j+1]; i++)
+ {
+ p_Vid->seiToneMapping->lut[i] = seiToneMappingTmp.sei_pivot_value[j] + (( (i - seiToneMappingTmp.coded_pivot_value[j]) * slope)>>16);
+ }
+#else
+ double slope = (double)(seiToneMappingTmp.sei_pivot_value[j+1] - seiToneMappingTmp.sei_pivot_value[j])/(seiToneMappingTmp.coded_pivot_value[j+1]-seiToneMappingTmp.coded_pivot_value[j]);
+ for (i=seiToneMappingTmp.coded_pivot_value[j]; i <= seiToneMappingTmp.coded_pivot_value[j+1]; i++)
+ {
+ p_Vid->seiToneMapping->lut[i] = (imgpel) (seiToneMappingTmp.sei_pivot_value[j] + (int)(( (i - seiToneMappingTmp.coded_pivot_value[j]) * slope)));
+ }
+#endif
+ }
+ break;
+
+ default:
+ break;
+ } // end switch
+ }
+#endif
+ } // end !tone_map_cancel_flag
+ free (buf);
+}
+
+#if (ENABLE_OUTPUT_TONEMAPPING)
+// tone map using the look-up-table generated according to SEI tone mapping message
+void tone_map (imgpel** imgX, imgpel* lut, int size_x, int size_y)
+{
+ int i, j;
+
+ for(i=0;i<size_y;i++)
+ {
+ for(j=0;j<size_x;j++)
+ {
+ imgX[i][j] = (imgpel)lut[imgX[i][j]];
+ }
+ }
+}
+
+void init_tone_mapping_sei(ToneMappingSEI *seiToneMapping)
+{
+ seiToneMapping->seiHasTone_mapping = FALSE;
+ seiToneMapping->count = 0;
+}
+
+void update_tone_mapping_sei(ToneMappingSEI *seiToneMapping)
+{
+
+ if(seiToneMapping->tone_map_repetition_period == 0)
+ {
+ seiToneMapping->seiHasTone_mapping = FALSE;
+ seiToneMapping->count = 0;
+ }
+ else if (seiToneMapping->tone_map_repetition_period>1)
+ {
+ seiToneMapping->count++;
+ if (seiToneMapping->count>=seiToneMapping->tone_map_repetition_period)
+ {
+ seiToneMapping->seiHasTone_mapping = FALSE;
+ seiToneMapping->count = 0;
+ }
+ }
+}
+#endif
+
+/*!
+ ************************************************************************
+ * \brief
+ * Interpret the post filter hints SEI message (JVT-U035)
+ * \param payload
+ * a pointer that point to the sei payload
+ * \param size
+ * the size of the sei message
+ * \param p_Vid
+ * the image pointer
+ *
+ ************************************************************************
+ */
+void interpret_post_filter_hints_info( byte* payload, int size, VideoParameters *p_Vid )
+{
+ Bitstream* buf;
+ unsigned int filter_hint_size_y, filter_hint_size_x, filter_hint_type, color_component, cx, cy, additional_extension_flag;
+ int ***filter_hint;
+
+ buf = malloc(sizeof(Bitstream));
+ buf->bitstream_length = size;
+ buf->streamBuffer = payload;
+ buf->frame_bitoffset = 0;
+
+ filter_hint_size_y = ue_v("SEI: filter_hint_size_y", buf); // interpret post-filter hint SEI here
+ filter_hint_size_x = ue_v("SEI: filter_hint_size_x", buf); // interpret post-filter hint SEI here
+ filter_hint_type = u_v(2, "SEI: filter_hint_type", buf); // interpret post-filter hint SEI here
+
+ get_mem3Dint (&filter_hint, 3, filter_hint_size_y, filter_hint_size_x);
+
+ for (color_component = 0; color_component < 3; color_component ++)
+ for (cy = 0; cy < filter_hint_size_y; cy ++)
+ for (cx = 0; cx < filter_hint_size_x; cx ++)
+ filter_hint[color_component][cy][cx] = se_v("SEI: filter_hint", buf); // interpret post-filter hint SEI here
+
+ additional_extension_flag = u_1("SEI: additional_extension_flag", buf); // interpret post-filter hint SEI here
+
+#ifdef PRINT_POST_FILTER_HINT_INFO
+ printf(" Post-filter hint SEI message\n");
+ printf(" post_filter_hint_size_y %d \n", filter_hint_size_y);
+ printf(" post_filter_hint_size_x %d \n", filter_hint_size_x);
+ printf(" post_filter_hint_type %d \n", filter_hint_type);
+ for (color_component = 0; color_component < 3; color_component ++)
+ for (cy = 0; cy < filter_hint_size_y; cy ++)
+ for (cx = 0; cx < filter_hint_size_x; cx ++)
+ printf(" post_filter_hint[%d][%d][%d] %d \n", color_component, cy, cx, filter_hint[color_component][cy][cx]);
+
+ printf(" additional_extension_flag %d \n", additional_extension_flag);
+
+#undef PRINT_POST_FILTER_HINT_INFO
+#endif
+
+ free_mem3Dint (filter_hint);
+ free( buf );
+}
diff --git a/Src/h264dec/ldecod/src/storable_picture.c b/Src/h264dec/ldecod/src/storable_picture.c
new file mode 100644
index 00000000..c12b68b3
--- /dev/null
+++ b/Src/h264dec/ldecod/src/storable_picture.c
@@ -0,0 +1,287 @@
+#include "global.h"
+#include "mbuffer.h"
+#include "memalloc.h"
+
+static void alloc_pic_motion(VideoParameters *p_Vid, PicMotionParams *motion, int size_y, int size_x)
+{
+ // TODO: benski> re-use memory just like for image data
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ if (!active_sps->frame_mbs_only_flag)
+ {
+ get_mem3Dref(&(motion->field_references), 4, size_y, size_x);
+ }
+ else
+ {
+ motion->field_references = 0; // just in case
+ }
+
+ if (motion_cache_dimensions_match(&p_Vid->motion_cache, size_x, size_y))
+ {
+ motion->motion[LIST_0]=motion_cache_get(&p_Vid->motion_cache);
+ motion->motion[LIST_1]=motion_cache_get(&p_Vid->motion_cache);
+ }
+ if (!motion->motion[LIST_0])
+ get_mem2DPicMotion(&(motion->motion[LIST_0]), size_y, size_x);
+ if (!motion->motion[LIST_1])
+ get_mem2DPicMotion(&(motion->motion[LIST_1]), size_y, size_x);
+
+ motion->mb_field = calloc (size_y * size_x, sizeof(byte));
+ if (motion->mb_field == NULL)
+ no_mem_exit("alloc_storable_picture: motion->mb_field");
+
+ get_mem2D (&(motion->field_frame), size_y, size_x);
+}
+
+void free_pic_motion(VideoParameters *p_Vid, PicMotionParams *motion, int size_x, int size_y)
+{
+ if (motion->motion[LIST_0])
+ {
+ if (motion_cache_dimensions_match(&p_Vid->motion_cache, size_x / BLOCK_SIZE, size_y / BLOCK_SIZE))
+ {
+ motion_cache_add(&p_Vid->motion_cache,motion->motion[LIST_0]);
+ motion_cache_add(&p_Vid->motion_cache,motion->motion[LIST_1]);
+ }
+ else
+ {
+ free_mem2DPicMotion(motion->motion[LIST_0]);
+ free_mem2DPicMotion(motion->motion[LIST_1]);
+ }
+ motion->motion[LIST_0] = NULL;
+ motion->motion[LIST_1] = NULL;
+ }
+
+ if (motion->field_references)
+ {
+ free_mem3Dref(motion->field_references);
+ motion->field_references=0;
+ }
+
+ if (motion->mb_field)
+ {
+ free(motion->mb_field);
+ motion->mb_field = NULL;
+ }
+
+ if (motion->field_frame)
+ {
+ free_mem2D (motion->field_frame);
+ motion->field_frame=NULL;
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Free picture memory.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param p
+ * Picture to be freed
+ *
+ ************************************************************************
+ */
+static void internal_free_storable_picture(VideoParameters *p_Vid, StorablePicture* p)
+{
+ int nplane;
+ if (p)
+ {
+
+ free_pic_motion(p_Vid, &p->motion, p->size_x, p->size_y);
+
+ //if( IS_INDEPENDENT(p_Vid) )
+ {
+ for( nplane=0; nplane<MAX_PLANE; nplane++ )
+ {
+ free_pic_motion(p_Vid, &p->JVmotion[nplane], p->size_x, p->size_y);
+ }
+ }
+
+ if (image_cache_dimensions_match(&p_Vid->image_cache[0], p->size_x, p->size_y))
+ image_cache_add(&p_Vid->image_cache[0], p->imgY);
+ else
+ free_memImage(p->imgY);
+
+ if (image_cache_dimensions_match(&p_Vid->image_cache[1], p->size_x_cr, p->size_y_cr))
+ image_cache_add(&p_Vid->image_cache[1], p->imgUV[0]);
+ else
+ free_memImage(p->imgUV[0]);
+
+ if (image_cache_dimensions_match(&p_Vid->image_cache[1], p->size_x_cr, p->size_y_cr))
+ image_cache_add(&p_Vid->image_cache[1], p->imgUV[1]);
+ else
+ free_memImage(p->imgUV[1]);
+
+ if (p->slice_id)
+ {
+ free_mem2Dshort(p->slice_id);
+ p->slice_id=NULL;
+ }
+
+ if (p->seiHasTone_mapping)
+ free(p->tone_mapping_lut);
+
+ _aligned_free(p);
+ p = NULL;
+ }
+}
+
+void free_storable_picture(VideoParameters *p_Vid, StorablePicture* p)
+{
+ if (p && --p->retain_count == 0)
+ {
+ internal_free_storable_picture(p_Vid, p);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate memory for a stored picture.
+ *
+ * \param p_Vid
+ * image decoding parameters for current picture
+ * \param structure
+ * picture structure
+ * \param size_x
+ * horizontal luma size
+ * \param size_y
+ * vertical luma size
+ * \param size_x_cr
+ * horizontal chroma size
+ * \param size_y_cr
+ * vertical chroma size
+ *
+ * \return
+ * the allocated StorablePicture structure
+ ************************************************************************
+ */
+#define ROUNDUP32(size) (((size)+31) & ~31)
+
+StorablePicture* alloc_storable_picture(VideoParameters *p_Vid, PictureStructure structure, int size_x, int size_y, int size_x_cr, int size_y_cr)
+{
+ seq_parameter_set_rbsp_t *active_sps = p_Vid->active_sps;
+
+ StorablePicture *s;
+ int nplane;
+
+ //printf ("Allocating (%s) picture (x=%d, y=%d, x_cr=%d, y_cr=%d)\n", (type == FRAME)?"FRAME":(type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", size_x, size_y, size_x_cr, size_y_cr);
+ s = _aligned_malloc(sizeof(StorablePicture), 32);
+ if (NULL==s)
+ return 0;
+ memset(s, 0, sizeof(StorablePicture));
+
+ s->retain_count = 1;
+ s->time_code = (uint64_t)-666;
+
+ if (structure!=FRAME)
+ {
+ size_y /= 2;
+ size_y_cr /= 2;
+ }
+
+ s->PicSizeInMbs = (size_x*size_y)/256;
+
+ if (image_cache_dimensions_match(&p_Vid->image_cache[0], size_x, size_y))
+ s->imgY = image_cache_get(&p_Vid->image_cache[0]);
+ if (!s->imgY)
+ s->imgY = get_memImage(size_x, size_y);
+
+ if (active_sps->chroma_format_idc != YUV400)
+ {
+ if (image_cache_dimensions_match(&p_Vid->image_cache[1], size_x_cr, size_y_cr))
+ {
+ s->imgUV[0] = image_cache_get(&p_Vid->image_cache[1]);
+ s->imgUV[1] = image_cache_get(&p_Vid->image_cache[1]);
+ }
+
+ if (!s->imgUV[0])
+ s->imgUV[0] = get_memImage(size_x_cr, size_y);
+ if (!s->imgUV[1])
+ s->imgUV[1] = get_memImage(size_x_cr, size_y);
+ }
+
+ get_mem2Dshort (&(s->slice_id), size_y / MB_BLOCK_SIZE, size_x / MB_BLOCK_SIZE);
+
+ alloc_pic_motion(p_Vid, &s->motion, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+
+ if( IS_INDEPENDENT(p_Vid) )
+ {
+ for( nplane=0; nplane<MAX_PLANE; nplane++ )
+ {
+ alloc_pic_motion(p_Vid, &s->JVmotion[nplane], size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+ }
+ }
+
+ s->structure=structure;
+
+ s->size_x = size_x;
+ s->size_y = size_y;
+ s->size_x_cr = size_x_cr;
+ s->size_y_cr = size_y_cr;
+ s->size_x_m1 = size_x - 1;
+ s->size_y_m1 = size_y - 1;
+ s->size_x_cr_m1 = size_x_cr - 1;
+ s->size_y_cr_m1 = size_y_cr - 1;
+
+ s->top_field = p_Vid->no_reference_picture;
+ s->bottom_field = p_Vid->no_reference_picture;
+ s->frame = p_Vid->no_reference_picture;
+
+ return s;
+}
+
+void out_storable_picture_add(VideoParameters *img, StorablePicture *pic)
+{
+ if (img->out_pictures)
+ {
+ // see if we're full
+ if (img->size_out_pictures == img->num_out_pictures)
+ {
+ StorablePicture *pic=0;
+ out_storable_picture_get(img, &pic);
+ if (pic)
+ free_storable_picture(img, pic);
+ }
+
+ img->out_pictures[img->num_out_pictures++] = pic;
+ pic->retain_count++;
+ }
+}
+
+void out_storable_picture_get(VideoParameters *img, StorablePicture **pic)
+{
+ *pic = 0;
+ if (img->out_pictures && img->num_out_pictures)
+ {
+ *pic = img->out_pictures[0];
+ img->num_out_pictures--;
+ memmove(img->out_pictures, &img->out_pictures[1], img->num_out_pictures * sizeof(StorablePicture *));
+ }
+}
+
+void out_storable_pictures_init(VideoParameters *img, size_t count)
+{
+ img->out_pictures = (StorablePicture **)calloc(sizeof(StorablePicture *), count);
+ img->size_out_pictures = count;
+ img->num_out_pictures = 0;
+}
+
+void out_storable_pictures_destroy(VideoParameters *img)
+{
+ size_t i=0;
+ while (img->num_out_pictures)
+ {
+ StorablePicture *pic=0;
+ out_storable_picture_get(img, &pic);
+ if (pic)
+ free_storable_picture(img, pic);
+ }
+ free(img->out_pictures);
+ img->out_pictures = 0;
+ img->size_out_pictures = 0;
+}
+
diff --git a/Src/h264dec/ldecod/src/strength_horiz.c b/Src/h264dec/ldecod/src/strength_horiz.c
new file mode 100644
index 00000000..bd719deb
--- /dev/null
+++ b/Src/h264dec/ldecod/src/strength_horiz.c
@@ -0,0 +1,659 @@
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+
+void GetStrengthNormal_Horiz(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p)
+{
+ // dir == 1
+ PixelPos pixMB;
+ byte StrValue;
+ Macroblock *MbP;
+
+ assert(NUM_SLICE_TYPES == 5); // the next line assumes this
+ if (p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ // Set strength to either 3 or 4 regardless of pixel position
+ StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3;
+ memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte));
+ }
+ else
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int yQ = edge < 16 ? edge - 1: 0;
+
+ p_Vid->getNeighbour0X(MbQ, yQ, p_Vid->mb_size[IS_LUMA], &pixMB);
+
+ MbP = &(p_Vid->mb_data[pixMB.mb_addr]);
+
+ if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM))
+ {
+ PicMotionParams *motion = &p->motion;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int blkP, blkQ, idx;
+ int blk_x, blk_y ;
+ int posx;
+
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ short mb_x, mb_y;
+ const int blk_y2 = pixMB.pos_y >> 2;
+ int cbp_pq, cbp_p, cbp_q;
+
+ posx = pixMB.pos_x >> 2;
+ blkP = (pixMB.y & 0xFFFC);
+ blkQ = ((yQ+1) & 0xFFFC);
+
+ cbp_p = (int)MbQ->cbp_blk[0];
+ cbp_q = (int)MbP->cbp_blk[0];
+ cbp_pq = (((cbp_p >> blkQ) & 0xF) | ((cbp_q >> blkP) & 0xF));
+ if (cbp_pq == 0xF)
+ {
+ memset(Strength, 2, 16);
+ return;
+ //StrValue = 2;
+ }
+
+ p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ mb_x <<= 2;
+ mb_y <<= 2;
+
+ blk_x = mb_x + (blkQ & 3);
+ blk_y = mb_y + (blkQ >> 2);
+
+ for( idx = 0 ; idx < MB_BLOCK_SIZE ; idx += BLOCK_SIZE, posx++, blkP++, blkQ++, blk_x++, cbp_pq>>=1)
+ {
+ if (cbp_pq & 1)
+ StrValue = 2;
+ else
+ {
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+
+ motion_p0=&motion0[blk_y ][blk_x ];
+ motion_q0=&motion0[blk_y2][posx];
+ motion_p1=&motion1[blk_y ][blk_x ];
+ motion_q1=&motion1[blk_y2][posx];
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0 == ref_q0)
+ {
+ if (ref_p0 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else if (ref_p1 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+ StrValue = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ StrValue = 1;
+ }
+ }
+ memset(&Strength[idx], (byte) StrValue, BLOCK_SIZE * sizeof(byte));
+ }
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3;
+ memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte));
+ }
+ }
+}
+
+
+void GetStrength_Horiz_YUV420(byte Strength[4], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP)
+{
+ // dir == 1
+ byte StrValue;
+
+ assert(NUM_SLICE_TYPES == 5); // the next line assumes this
+ if (p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ // Set strength to either 3 or 4 regardless of pixel position
+ StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3;
+ memset(&Strength[0], (byte) StrValue, 4 * sizeof(byte));
+ }
+ else
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int yQ = edge < 16 ? edge - 1: 0;
+
+ if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM))
+ {
+ PicMotionParams *motion = &p->motion;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int blkP, blkQ, idx;
+ int posx;
+
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+
+ const int blk_y2 = pixMB.pos_y >> 2;
+ int cbp_pq, cbp_p, cbp_q;
+
+ blkP = (pixMB.y & 0xFFFC);
+ blkQ = ((yQ+1) & 0xFFFC);
+
+ cbp_p = (int)MbQ->cbp_blk[0];
+ cbp_q = (int)MbP->cbp_blk[0];
+ cbp_pq = (((cbp_p >> blkQ) & 0xF) | ((cbp_q >> blkP) & 0xF));
+ if (cbp_pq == 0xF)
+ {
+ memset(Strength, 2, 4);
+ return;
+ //StrValue = 2;
+ }
+ posx = pixMB.pos_x >> 2;
+#ifdef _DEBUG
+ {
+ short mb_x, mb_y;
+ get_mb_block_pos_normal(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ assert((mb_x << 2) == posx);
+ assert(((mb_y << 2) + (blkQ >> 2)) == (blk_y2+1));
+ }
+#endif
+ //blk_y = mb_y + (blkQ >> 2);
+
+ for( idx = 0 ; idx < MB_BLOCK_SIZE ; idx += BLOCK_SIZE, posx++, cbp_pq>>=1)
+ {
+ if (cbp_pq & 1)
+ StrValue = 2;
+ else
+ {
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+
+
+ motion_p0=&motion0[blk_y2+1][posx];
+ motion_q0=&motion0[blk_y2][posx];
+ motion_p1=&motion1[blk_y2+1][posx];
+ motion_q1=&motion1[blk_y2][posx];
+
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0 == ref_q0)
+ {
+ if (ref_p0 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else if (ref_p1 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+ StrValue = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ StrValue = 1;
+ }
+ }
+ Strength[idx/4] = StrValue;
+ //memset(&Strength[idx/4], (byte) StrValue, sizeof(byte));
+ }
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ StrValue = (edge == 0 && p->structure==FRAME) ? 4 : 3;
+ memset(&Strength[0], (byte) StrValue, 4 * sizeof(byte));
+ }
+ }
+}
+
+void GetStrengthMBAff_Horiz_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p)
+{
+ // dir == 1
+ short blkP, blkQ, idx;
+ short blk_x, blk_x2, blk_y, blk_y2 ;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int xQ, yQ;
+ short mb_x, mb_y;
+ Macroblock *MbP;
+
+ PixelPos pixP;
+ int dir_m1 = 0;
+
+ PicMotionParams *motion = &p->motion;
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ yQ = (edge < MB_BLOCK_SIZE ? edge : 1);
+
+ for( idx = 0; idx < 16; ++idx )
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ xQ = idx;
+
+ getAffNeighbourPXLumaNB(MbQ, xQ , yQ - 1, &pixP);
+ blkQ = (short) ((yQ & 0xFFFC) + (xQ >> 2));
+ blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2));
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field);
+
+ if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ Strength[idx] = (edge == 0 && (((!MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3;
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ Strength[idx] = (edge == 0 && (((!MbP->mb_field && !MbQ->mb_field)))) ? 4 : 3;
+
+ if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM)
+ && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) )
+ {
+ if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) )
+ Strength[idx] = 2 ;
+ else
+ {
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ if (p_Vid->mixedModeEdgeFlag)
+ {
+ (Strength[idx] = 1);
+ }
+ else
+ {
+ get_mb_block_pos_mbaff(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ blk_y = (short) ((mb_y<<2) + (blkQ >> 2));
+ blk_x = (short) ((mb_x<<2) + (blkQ & 3));
+ blk_y2 = (short) (pixP.pos_y >> 2);
+ blk_x2 = (short) (pixP.pos_x >> 2);
+ {
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+ motion_p0=&motion0[blk_y ][blk_x ];
+ motion_q0=&motion0[blk_y2][blk_x2];
+ motion_p1=&motion1[blk_y ][blk_x ];
+ motion_q1=&motion1[blk_y2][blk_x2];
+
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) ||
+ ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ Strength[idx]=0;
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0==ref_q0)
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+
+ Strength[idx] = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ Strength[idx] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static __forceinline uint8_t GetMotionStrength(PicMotion *motion0, PicMotion *motion1, int motion_stride, int mvlimit)
+{
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+
+ ref_p0 = motion0[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[0].ref_pic_id;
+ ref_p1 = motion1[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[0].ref_pic_id;
+ ref_q0 = motion0[motion_stride].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[motion_stride].ref_pic_id;
+ ref_q1 = motion1[motion_stride].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[motion_stride].ref_pic_id;
+
+ if (ref_p0==ref_q0 && ref_p1==ref_q1)
+ {
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0 == UNDEFINED_REFERENCE)
+ {
+ return (byte) (
+ (abs( motion1[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit));
+ }
+ else if (ref_p1 == UNDEFINED_REFERENCE)
+ {
+ return (byte) (
+ (abs( motion0[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit));
+ }
+ else
+ {
+ return (byte) (
+ (abs( motion0[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit) ||
+ (abs( motion1[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+ return (byte) (
+ ((abs( motion0[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit ) ||
+ (abs( motion1[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit))
+ &&
+ ((abs( motion0[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit) ||
+ (abs( motion1[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit)));
+ }
+ }
+ else if (ref_p0==ref_q1 && ref_p1==ref_q0)
+ {
+ return (byte) (
+ (abs( motion0[0].mv[0] - motion1[motion_stride].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion1[motion_stride].mv[1]) >= mvlimit) ||
+ (abs( motion1[0].mv[0] - motion0[motion_stride].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion0[motion_stride].mv[1]) >= mvlimit));
+ }
+ else
+ {
+ return 1;
+ }
+}
+
+
+void GetStrength_Horiz_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag)
+{
+ // dir == 1
+ assert(NUM_SLICE_TYPES == 5); // the next line assumes this
+ if ((p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ || ((1 << MbQ->mb_type) & 26112))
+ {
+ // Set strength to either 3 or 4 regardless of pixel position
+ *(int32_t *)(Strength[0]) = MbP?p->structure==FRAME ? 0x04040404 : 0x03030303 : 0;
+ *(int32_t *)(Strength[1]) = luma_transform_size_8x8_flag?0:0x03030303;
+ *(int32_t *)(Strength[2]) = 0x03030303;
+ *(int32_t *)(Strength[3]) = luma_transform_size_8x8_flag?0:0x03030303;
+ }
+ else
+ {
+ PicMotionParams *motion = &p->motion;
+ int motion_stride = p->size_x>>2;
+ PicMotion *motion0 = &motion->motion[LIST_0][pos_y-!!MbP][pos_x];
+ PicMotion *motion1 = &motion->motion[LIST_1][pos_y-!!MbP][pos_x];
+
+ int cbp_p, cbp_q=(int)MbQ->cbp_blk[0], cbp_pq;
+
+ // edge 0
+ if (!MbP)
+ {
+ *(int32_t *)(Strength[0]) = 0;
+ }
+ else if ((1 << MbP->mb_type) & 26112)
+ {
+ *(int32_t *)(Strength[0]) = p->structure==FRAME ? 0x04040404 : 0x03030303;
+ motion0 += motion_stride;
+ motion1 += motion_stride;
+ }
+ else
+ {
+ cbp_p=(int)MbP->cbp_blk[0];
+ cbp_pq = (((cbp_p >> 12) & 0xF) | (cbp_q & 0xF));
+ if (cbp_pq == 0xF)
+ {
+ memset(Strength[0], 2, 4);
+ }
+ else
+ {
+ if (cbp_pq & (1<<0))
+ Strength[0][0] = 2;
+ else
+ Strength[0][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit);
+
+ if (cbp_pq & (1<<1))
+ Strength[0][1] = 2;
+ else
+ Strength[0][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit);
+
+ if (cbp_pq & (1<<2))
+ Strength[0][2] = 2;
+ else
+ Strength[0][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit);
+
+ if (cbp_pq & (1<<3))
+ Strength[0][3] = 2;
+ else
+ Strength[0][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit);
+ }
+ motion0 += motion_stride;
+ motion1 += motion_stride;
+ }
+
+ // edge 1
+ if (luma_transform_size_8x8_flag)
+ {
+ *(int32_t *)(Strength[1]) = 0;
+ }
+ else
+ {
+ cbp_pq = ((cbp_q) | (cbp_q >> 4)) & 0xF;
+ if (cbp_pq == 0xF)
+ {
+ memset(Strength[1], 2, 4);
+ }
+ else
+ {
+ if (cbp_pq & (1<<0))
+ Strength[1][0] = 2;
+ else
+ Strength[1][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit);
+
+ if (cbp_pq & (1<<1))
+ Strength[1][1] = 2;
+ else
+ Strength[1][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit);
+
+ if (cbp_pq & (1<<2))
+ Strength[1][2] = 2;
+ else
+ Strength[1][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit);
+
+ if (cbp_pq & (1<<3))
+ Strength[1][3] = 2;
+ else
+ Strength[1][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit);
+
+ }
+ }
+
+
+ motion0 += motion_stride;
+ motion1 += motion_stride;
+ // edge 2
+ cbp_pq = (cbp_q | (cbp_q >> 4)) & 0xF0;
+ if (cbp_pq == 0xF0)
+ {
+ memset(Strength[2], 2, 4);
+ }
+ else
+ {
+ if (cbp_pq & (0x10<<0))
+ Strength[2][0] = 2;
+ else
+ Strength[2][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit);
+
+ if (cbp_pq & (0x10<<1))
+ Strength[2][1] = 2;
+ else
+ Strength[2][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit);
+
+ if (cbp_pq & (0x10<<2))
+ Strength[2][2] = 2;
+ else
+ Strength[2][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit);
+
+ if (cbp_pq & (0x10<<3))
+ Strength[2][3] = 2;
+ else
+ Strength[2][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit);
+ }
+
+
+ motion0 += motion_stride;
+ motion1 += motion_stride;
+ // edge 3
+ if (luma_transform_size_8x8_flag)
+ {
+ *(int32_t *)(Strength[3]) = 0;
+ }
+ else
+ {
+ cbp_pq = (cbp_q | (cbp_q >> 4)) & 0xF00;
+ if (cbp_pq == 0xF00)
+ {
+ memset(Strength[3], 2, 4);
+ }
+ else
+ {
+ if (cbp_pq & (0x100<<0))
+ Strength[3][0] = 2;
+ else
+ Strength[3][0] = GetMotionStrength(&motion0[0], &motion1[0], motion_stride, mvlimit);
+
+ if (cbp_pq & (0x100<<1))
+ Strength[3][1] = 2;
+ else
+ Strength[3][1] = GetMotionStrength(&motion0[1], &motion1[1], motion_stride, mvlimit);
+
+ if (cbp_pq & (0x100<<2))
+ Strength[3][2] = 2;
+ else
+ Strength[3][2] = GetMotionStrength(&motion0[2], &motion1[2], motion_stride, mvlimit);
+
+ if (cbp_pq & (0x100<<3))
+ Strength[3][3] = 2;
+ else
+ Strength[3][3] = GetMotionStrength(&motion0[3], &motion1[3], motion_stride, mvlimit);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/Src/h264dec/ldecod/src/strength_vert.c b/Src/h264dec/ldecod/src/strength_vert.c
new file mode 100644
index 00000000..89e545d4
--- /dev/null
+++ b/Src/h264dec/ldecod/src/strength_vert.c
@@ -0,0 +1,594 @@
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+#include "loopfilter.h"
+
+void GetStrengthNormal_Vert(byte Strength[MB_BLOCK_SIZE], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p)
+{
+ // dir == 0
+ PixelPos pixP, pixMB;
+ byte StrValue;
+ Macroblock *MbP;
+
+ if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ // Set strength to either 3 or 4 regardless of pixel position
+ StrValue = (edge == 0) ? 4 : 3;
+ memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte));
+ }
+ else
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ int xQ = edge - 1;
+ int yQ = 0;
+
+ p_Vid->getNeighbourX0(MbQ, xQ, p_Vid->mb_size[IS_LUMA], &pixMB);
+ pixP = pixMB;
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+
+ if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM))
+ {
+ PicMotionParams *motion = &p->motion;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int blkP, blkQ, idx;
+ int blk_x, blk_x2, blk_y, blk_y2 ;
+
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ short mb_x, mb_y;
+
+ p_Vid->get_mb_block_pos (p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ mb_x <<= 2;
+ mb_y <<= 2;
+
+ xQ ++;
+
+ for( idx = 0 ; idx < MB_BLOCK_SIZE ; idx += BLOCK_SIZE )
+ {
+ yQ = idx;
+
+ blkQ = (yQ & 0xFFFC) + (xQ >> 2);
+ blkP = (idx & 0xFFFC) + (pixP.x >> 2);
+
+ if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) )
+ StrValue = 2;
+ else
+ {
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+ blk_y = mb_y + (blkQ >> 2);
+ blk_x = mb_x + (blkQ & 3);
+ blk_y2 = (pixMB.pos_y + idx) >> 2;
+ blk_x2 = pixMB.pos_x >> 2;
+
+ motion_p0=&motion0[blk_y ][blk_x ];
+ motion_q0=&motion0[blk_y2][blk_x2];
+ motion_p1=&motion1[blk_y ][blk_x ];
+ motion_q1=&motion1[blk_y2][blk_x2];
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0 == ref_q0)
+ {
+ if (ref_p0 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else if (ref_p1 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+ StrValue = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ StrValue = 1;
+ }
+ }
+ memset(&Strength[idx], (byte) StrValue, BLOCK_SIZE * sizeof(byte));
+ }
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ StrValue = (edge == 0) ? 4 : 3;
+ memset(&Strength[0], (byte) StrValue, MB_BLOCK_SIZE * sizeof(byte));
+ }
+ }
+}
+
+void GetStrength_Vert_YUV420(uint8_t Strength[4], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p, PixelPos pixMB, Macroblock *MbP)
+{
+ // dir == 0
+ int i;
+ uint8_t StrValue;
+
+ if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ {
+ // Set strength to either 3 or 4 regardless of pixel position
+ StrValue = (edge == 0) ? 4 : 3;
+ for (i=0;i<4;i++)
+ {
+ Strength[i]=StrValue;
+ }
+ }
+ else
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+ if (!(MbP->mb_type==I4MB||MbP->mb_type==I8MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I8MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM))
+ {
+ PicMotionParams *motion = &p->motion;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int blkP, blkQ, idx;
+ int blk_x2, blk_y, blk_y2 ;
+
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ short mb_x, mb_y;
+ const int cbp_p=(int)MbP->cbp_blk[0], cbp_q=(int)MbQ->cbp_blk[0];
+
+ get_mb_block_pos_normal(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ mb_x <<= 2;
+ mb_y <<= 2;
+
+ mb_x += edge;
+ blkQ = edge;
+ blkP = pixMB.x >> 2;
+ blk_x2 = pixMB.pos_x >> 2;
+
+ for( idx = 0 ; idx < BLOCK_SIZE ; idx++,blkQ+=BLOCK_SIZE, blkP+=BLOCK_SIZE)
+ {
+ if (_bittest(&cbp_p, blkP) || _bittest(&cbp_q, blkQ))
+ StrValue = 2;
+ else
+ {
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+ blk_y = mb_y + idx;
+ blk_y2 = (pixMB.pos_y >> 2) + idx;
+
+ motion_p0=&motion0[blk_y ][mb_x ];
+ motion_q0=&motion0[blk_y2][blk_x2];
+ motion_p1=&motion1[blk_y ][mb_x ];
+ motion_q1=&motion1[blk_y2][blk_x2];
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0 == ref_q0)
+ {
+ if (ref_p0 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else if (ref_p1 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+ StrValue = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ StrValue = 1;
+ }
+ }
+ Strength[idx] = StrValue;
+ }
+ }
+ else
+ {
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ StrValue = (edge == 0) ? 4 : 3;
+ for (i=0;i<4;i++)
+ {
+ Strength[i]=StrValue;
+ }
+ }
+ }
+}
+
+// assumes YUV420, MB Aff
+void GetStrength_MBAff_Vert_YUV420(byte Strength[16], Macroblock *MbQ, int edge, int mvlimit, StorablePicture *p)
+{
+ // dir == 0
+ if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE)
+ || (MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM))
+ {
+ memset(Strength,(edge == 0) ? 4 : 3, 16);
+ }
+ else
+ {
+ short blkP, blkQ, idx;
+ short blk_x, blk_x2, blk_y, blk_y2 ;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+ int xQ, yQ;
+ short mb_x, mb_y;
+ Macroblock *MbP;
+
+ PixelPos pixP;
+
+ PicMotionParams *motion = &p->motion;
+ PicMotion **motion0 = motion->motion[LIST_0];
+ PicMotion **motion1 = motion->motion[LIST_1];
+ xQ = edge;
+ for( idx = 0; idx < 16; ++idx )
+ {
+ VideoParameters *p_Vid = MbQ->p_Vid;
+
+ yQ = idx;
+ getAffNeighbourXPLuma(MbQ, xQ - 1, yQ, &pixP);
+ blkQ = (short) ((yQ & 0xC) + (xQ >> 2)); // blkQ changes once every 4 loop iterations
+ blkP = (short) ((pixP.y & 0xFFFC) + (pixP.x >> 2));
+
+ MbP = &(p_Vid->mb_data[pixP.mb_addr]);
+ p_Vid->mixedModeEdgeFlag = (byte) (MbQ->mb_field != MbP->mb_field);
+
+ // Start with Strength=3. or Strength=4 for Mb-edge
+ Strength[idx] = (edge == 0) ? 4 : 3;
+
+ if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM))
+ {
+ if( ((MbQ->cbp_blk[0] & ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk[0] & ((int64)1 << blkP)) != 0) )
+ Strength[idx] = 2 ;
+ else
+ {
+ // if no coefs, but vector difference >= 1 set Strength=1
+ // if this is a mixed mode edge then one set of reference pictures will be frame and the
+ // other will be field
+ if (p_Vid->mixedModeEdgeFlag)
+ {
+ (Strength[idx] = 1);
+ }
+ else
+ {
+ get_mb_block_pos_mbaff(p_Vid->PicPos, MbQ->mbAddrX, &mb_x, &mb_y);
+ blk_y = (short) ((mb_y<<2) + (blkQ >> 2));
+ blk_x = (short) ((mb_x<<2) + (blkQ & 3));
+ blk_y2 = (short) (pixP.pos_y >> 2);
+ blk_x2 = (short) (pixP.pos_x >> 2);
+ {
+ PicMotion *motion_p0, *motion_q0, *motion_p1, *motion_q1;
+ motion_p0=&motion0[blk_y ][blk_x ];
+ motion_q0=&motion0[blk_y2][blk_x2];
+ motion_p1=&motion1[blk_y ][blk_x ];
+ motion_q1=&motion1[blk_y2][blk_x2];
+
+ ref_p0 = motion_p0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p0->ref_pic_id;
+ ref_q0 = motion_q0->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q0->ref_pic_id;
+ ref_p1 = motion_p1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_p1->ref_pic_id;
+ ref_q1 = motion_q1->ref_idx < 0 ? UNDEFINED_REFERENCE : motion_q1->ref_pic_id;
+
+ if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) ||
+ ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+ {
+ Strength[idx]=0;
+ // L0 and L1 reference pictures of p0 are different; q0 as well
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0==ref_q0)
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit));
+ }
+ else
+ {
+ Strength[idx] = (byte) (
+ (abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+
+ Strength[idx] = (byte) (
+ ((abs( motion_p0->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q0->mv[1]) >= mvlimit ) ||
+ (abs( motion_p1->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q1->mv[1]) >= mvlimit))
+ &&
+ ((abs( motion_p0->mv[0] - motion_q1->mv[0]) >= 4) ||
+ (abs( motion_p0->mv[1] - motion_q1->mv[1]) >= mvlimit) ||
+ (abs( motion_p1->mv[0] - motion_q0->mv[0]) >= 4) ||
+ (abs( motion_p1->mv[1] - motion_q0->mv[1]) >= mvlimit)));
+ }
+ }
+ else
+ {
+ Strength[idx] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static __forceinline uint8_t GetMotionStrength(PicMotion *motion0, PicMotion *motion1, int mvlimit)
+{
+ uint8_t StrValue;
+ h264_ref_t ref_p0,ref_p1,ref_q0,ref_q1;
+
+ ref_p0 = motion0[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[0].ref_pic_id;
+ ref_p1 = motion1[0].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[0].ref_pic_id;
+ ref_q0 = motion0[1].ref_idx < 0 ? UNDEFINED_REFERENCE : motion0[1].ref_pic_id;
+ ref_q1 = motion1[1].ref_idx < 0 ? UNDEFINED_REFERENCE : motion1[1].ref_pic_id;
+
+ if (ref_p0==ref_q0 && ref_p1==ref_q1)
+ {
+ if (ref_p0 != ref_p1)
+ {
+ // compare MV for the same reference picture
+ if (ref_p0 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion1[0].mv[0] - motion1[1].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion1[1].mv[1]) >= mvlimit));
+ }
+ else if (ref_p1 == UNDEFINED_REFERENCE)
+ {
+ StrValue = (byte) (
+ (abs( motion0[0].mv[0] - motion0[1].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion0[1].mv[1]) >= mvlimit));
+ }
+ else
+ {
+ StrValue = (byte) (
+ (abs( motion0[0].mv[0] - motion0[1].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion0[1].mv[1]) >= mvlimit) ||
+ (abs( motion1[0].mv[0] - motion1[1].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion1[1].mv[1]) >= mvlimit));
+ }
+ }
+ else
+ { // L0 and L1 reference pictures of p0 are the same; q0 as well
+ StrValue = (byte) (
+ ((abs( motion0[0].mv[0] - motion0[1].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion0[1].mv[1]) >= mvlimit ) ||
+ (abs( motion1[0].mv[0] - motion1[1].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion1[1].mv[1]) >= mvlimit))
+ &&
+ ((abs( motion0[0].mv[0] - motion1[1].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion1[1].mv[1]) >= mvlimit) ||
+ (abs( motion1[0].mv[0] - motion0[1].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion0[1].mv[1]) >= mvlimit)));
+ }
+ }
+ else if (ref_p0==ref_q1 && ref_p1==ref_q0)
+ {
+ StrValue = (byte) (
+ (abs( motion0[0].mv[0] - motion1[1].mv[0]) >= 4) ||
+ (abs( motion0[0].mv[1] - motion1[1].mv[1]) >= mvlimit) ||
+ (abs( motion1[0].mv[0] - motion0[1].mv[0]) >= 4) ||
+ (abs( motion1[0].mv[1] - motion0[1].mv[1]) >= mvlimit));
+ }
+ else
+ {
+ StrValue = 1;
+ }
+ return StrValue;
+}
+
+void GetStrength_Vert_YUV420_All(uint8_t Strength[4][4], Macroblock *MbQ, int mvlimit, StorablePicture *p, int pos_x, int pos_y, Macroblock *MbP, int luma_transform_size_8x8_flag)
+{
+ // dir == 0
+ if ((p->slice_type>=SP_SLICE) //(p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) )
+ || ((1 << MbQ->mb_type) & 26112))
+ {
+ // Set strength to either 3 or 4 regardless of pixel position
+ *(int32_t *)(Strength[0]) = MbP?0x04040404:0;
+ *(int32_t *)(Strength[1]) = luma_transform_size_8x8_flag?0:0x03030303;
+ *(int32_t *)(Strength[2]) = 0x03030303;
+ *(int32_t *)(Strength[3]) = luma_transform_size_8x8_flag?0:0x03030303;
+ }
+ else
+ {
+ PicMotionParams *motion = &p->motion;
+ int motion_stride = p->size_x >> 2;
+ PicMotion *motion0 = &motion->motion[LIST_0][pos_y][pos_x];
+ PicMotion *motion1 = &motion->motion[LIST_1][pos_y][pos_x];
+ int cbp_q=(int)MbQ->cbp_blk[0];
+
+ // edge 0
+ if (!MbP)
+ {
+ *(int32_t *)(Strength[0]) = 0;
+ }
+ else if ((1 << MbP->mb_type) & 26112)
+ {
+ *(int32_t *)(Strength[0]) = 0x04040404;
+ }
+ else
+ {
+ int cbp_p = (int)MbP->cbp_blk[0];
+ if( ((cbp_q & (1 << 0 )) != 0) || ((cbp_p & (1 << (3))) != 0) )
+ Strength[0][0] = 2;
+ else
+ Strength[0][0] = GetMotionStrength(&motion0[0-1], &motion1[0-1], mvlimit);
+
+ if( ((cbp_q & (1 << 4 )) != 0) || ((cbp_p & (1 << (4 + 3))) != 0) )
+ Strength[0][1] = 2;
+ else
+ Strength[0][1] = GetMotionStrength(&motion0[motion_stride-1], &motion1[motion_stride-1], mvlimit);
+
+ if( ((cbp_q & (1 << 8 )) != 0) || ((cbp_p & (1 << (8 + 3))) != 0) )
+ Strength[0][2] = 2;
+ else
+ Strength[0][2] = GetMotionStrength(&motion0[2*motion_stride-1], &motion1[2*motion_stride-1], mvlimit);
+
+ if( ((cbp_q & (1 << 12 )) != 0) || ((cbp_p & (1 << (12 + 3))) != 0) )
+ Strength[0][3] = 2;
+ else
+ Strength[0][3] = GetMotionStrength(&motion0[3*motion_stride-1], &motion1[3*motion_stride-1], mvlimit);
+ }
+
+ // edge 1
+ if (luma_transform_size_8x8_flag)
+ {
+ *(int32_t *)(Strength[1]) = 0;
+ }
+ else
+ {
+ if (cbp_q & (3 << 0))
+ Strength[1][0] = 2;
+ else
+ Strength[1][0] = GetMotionStrength(&motion0[0], &motion1[0], mvlimit);
+
+ if (cbp_q & (3 << 4))
+ Strength[1][1] = 2;
+ else
+ Strength[1][1] = GetMotionStrength(&motion0[1*motion_stride], &motion1[1*motion_stride], mvlimit);
+
+ if (cbp_q & (3 << 8))
+ Strength[1][2] = 2;
+ else
+ Strength[1][2] = GetMotionStrength(&motion0[2*motion_stride], &motion1[2*motion_stride], mvlimit);
+
+ if (cbp_q & (3 << 12))
+ Strength[1][3] = 2;
+ else
+ Strength[1][3] = GetMotionStrength(&motion0[3*motion_stride], &motion1[3*motion_stride], mvlimit);
+ }
+
+ // edge 2
+ if (cbp_q & (6 << 0))
+ Strength[2][0] = 2;
+ else
+ Strength[2][0] = GetMotionStrength(&motion0[1], &motion1[1], mvlimit);
+
+ if (cbp_q & (6 << 4))
+ Strength[2][1] = 2;
+ else
+ Strength[2][1] = GetMotionStrength(&motion0[motion_stride+1], &motion1[motion_stride+1], mvlimit);
+
+ if (cbp_q & (6 << 8))
+ Strength[2][2] = 2;
+ else
+ Strength[2][2] = GetMotionStrength(&motion0[2*motion_stride+1], &motion1[2*motion_stride+1], mvlimit);
+
+ if (cbp_q & (6 << 12))
+ Strength[2][3] = 2;
+ else
+ Strength[2][3] = GetMotionStrength(&motion0[3*motion_stride+1], &motion1[3*motion_stride+1], mvlimit);
+
+ // edge 3
+ if (luma_transform_size_8x8_flag)
+ {
+ *(int32_t *)(Strength[3]) = 0;
+ }
+ else
+ {
+ if (cbp_q & (0xC << 0))
+ Strength[3][0] = 2;
+ else
+ Strength[3][0] = GetMotionStrength(&motion0[2], &motion1[2], mvlimit);
+
+ if (cbp_q & (0xC << 4))
+ Strength[3][1] = 2;
+ else
+ Strength[3][1] = GetMotionStrength(&motion0[motion_stride+2], &motion1[motion_stride+2], mvlimit);
+
+ if (cbp_q & (0xC << 8))
+ Strength[3][2] = 2;
+ else
+ Strength[3][2] = GetMotionStrength(&motion0[2*motion_stride+2], &motion1[2*motion_stride+2], mvlimit);
+
+ if (cbp_q & (0xC << 12))
+ Strength[3][3] = 2;
+ else
+ Strength[3][3] = GetMotionStrength(&motion0[3*motion_stride+2], &motion1[3*motion_stride+2], mvlimit);
+ }
+ }
+}
diff --git a/Src/h264dec/ldecod/src/transform8x8.c b/Src/h264dec/ldecod/src/transform8x8.c
new file mode 100644
index 00000000..1072a6d1
--- /dev/null
+++ b/Src/h264dec/ldecod/src/transform8x8.c
@@ -0,0 +1,696 @@
+
+/*!
+***************************************************************************
+* \file transform8x8.c
+*
+* \brief
+* 8x8 transform functions
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Yuri Vatis
+* - Jan Muenster
+*
+* \date
+* 12. October 2003
+**************************************************************************
+*/
+
+#include "global.h"
+
+#include "image.h"
+#include "mb_access.h"
+#include "elements.h"
+#include "transform8x8.h"
+#include "transform.h"
+#include "quant.h"
+#include <emmintrin.h>
+
+static void inverse8x8_sse2(h264_short_8x8block_row_t *block)
+{
+ __m128i a0, a1, a2, a3;
+ __m128i p0, p1, p2, p3, p4, p5 ,p6, p7;
+ __m128i b0, b1, b2, b3, b4, b5, b6, b7;
+ __m128i r0, r1, r2, r3, r4, r5, r6, r7;
+
+ // Horizontal
+ b0 = _mm_load_si128((__m128i *)(block[0]));
+ b1 = _mm_load_si128((__m128i *)(block[1]));
+ b2 = _mm_load_si128((__m128i *)(block[2]));
+ b3 = _mm_load_si128((__m128i *)(block[3]));
+ b4 = _mm_load_si128((__m128i *)(block[4]));
+ b5 = _mm_load_si128((__m128i *)(block[5]));
+ b6 = _mm_load_si128((__m128i *)(block[6]));
+ b7 = _mm_load_si128((__m128i *)(block[7]));
+
+ /* rotate 8x8 (ugh) */
+ r0 = _mm_unpacklo_epi16(b0, b2);
+ r1 = _mm_unpacklo_epi16(b1, b3);
+ r2 = _mm_unpackhi_epi16(b0, b2);
+ r3 = _mm_unpackhi_epi16(b1, b3);
+ r4 = _mm_unpacklo_epi16(b4, b6);
+ r5 = _mm_unpacklo_epi16(b5, b7);
+ r6 = _mm_unpackhi_epi16(b4, b6);
+ r7 = _mm_unpackhi_epi16(b5, b7);
+
+ b0 = _mm_unpacklo_epi16(r0, r1);
+ b1 = _mm_unpackhi_epi16(r0, r1);
+ b2 = _mm_unpacklo_epi16(r2, r3);
+ b3 = _mm_unpackhi_epi16(r2, r3);
+ b4 = _mm_unpacklo_epi16(r4, r5);
+ b5 = _mm_unpackhi_epi16(r4, r5);
+ b6 = _mm_unpacklo_epi16(r6, r7);
+ b7 = _mm_unpackhi_epi16(r6, r7);
+
+ p0 = _mm_unpacklo_epi64(b0, b4);
+ p1 = _mm_unpackhi_epi64(b0, b4);
+ p2 = _mm_unpacklo_epi64(b1, b5);
+ p3 = _mm_unpackhi_epi64(b1, b5);
+ p4 = _mm_unpacklo_epi64(b2, b6);
+ p5 = _mm_unpackhi_epi64(b2, b6);
+ p6 = _mm_unpacklo_epi64(b3, b7);
+ p7 = _mm_unpackhi_epi64(b3, b7);
+
+ /* perform approx DCT */
+ a0 = _mm_add_epi16(p0, p4); // p0 + p4
+ a1 = _mm_sub_epi16(p0, p4); // p0 - p4
+ r0 = _mm_srai_epi16(p2, 1); // p2 >> 1
+ a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1)
+ r0 = _mm_srai_epi16(p6, 1); // p6 >> 1
+ a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1)
+
+ b0 = _mm_add_epi16(a0, a3); // a0 + a3;
+ b2 = _mm_sub_epi16(a1, a2); // a1 - a2;
+ b4 = _mm_add_epi16(a1, a2); // a1 + a2;
+ b6 = _mm_sub_epi16(a0, a3); // a0 - a3;
+
+ //-p3 + p5 - p7 - (p7 >> 1);
+ r0 = _mm_srai_epi16(p7, 1); // p7 >> 1
+ a0 = _mm_sub_epi16(p5, p3); // p5 - p3
+ a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7
+ a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1)
+
+ //p1 + p7 - p3 - (p3 >> 1);
+ r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1)
+ a1 = _mm_add_epi16(p1, p7); // p1 + p7
+ a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3
+ a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1)
+
+ // -p1 + p7 + p5 + (p5 >> 1);
+ r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1)
+ a2 = _mm_sub_epi16(p7, p1); // p7 - p1
+ a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5
+ a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1)
+
+ // p3 + p5 + p1 + (p1 >> 1);
+ a3 = _mm_add_epi16(p3, p5); // p3+p5
+ a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1
+ p1 = _mm_srai_epi16(p1, 1); // p1 >> 1
+ a3 = _mm_add_epi16(a3, p1); //p3 + p5 + p1 + (p1 >> 1)
+
+ r0 = _mm_srai_epi16(a3, 2); // a3>>2
+ b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2);
+ r0 = _mm_srai_epi16(a2, 2); // a2>>2
+ b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2);
+ a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe
+ b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2);
+ a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe
+ b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2);
+
+ p0 = _mm_add_epi16(b0, b7); // b0 + b7;
+ p1 = _mm_sub_epi16(b2, b5); // b2 - b5;
+ p2 = _mm_add_epi16(b4, b3); // b4 + b3;
+ p3 = _mm_add_epi16(b6, b1); // b6 + b1;
+ p4 = _mm_sub_epi16(b6, b1); // b6 - b1;
+ p5 = _mm_sub_epi16(b4, b3); // b4 - b3;
+ p6 = _mm_add_epi16(b2, b5); // b2 + b5;
+ p7 = _mm_sub_epi16(b0, b7); // b0 - b7;
+
+ /* rotate 8x8 (ugh) */
+ r0 = _mm_unpacklo_epi16(p0, p2);
+ r1 = _mm_unpacklo_epi16(p1, p3);
+ r2 = _mm_unpackhi_epi16(p0, p2);
+ r3 = _mm_unpackhi_epi16(p1, p3);
+ r4 = _mm_unpacklo_epi16(p4, p6);
+ r5 = _mm_unpacklo_epi16(p5, p7);
+ r6 = _mm_unpackhi_epi16(p4, p6);
+ r7 = _mm_unpackhi_epi16(p5, p7);
+
+ b0 = _mm_unpacklo_epi16(r0, r1);
+ b1 = _mm_unpackhi_epi16(r0, r1);
+ b2 = _mm_unpacklo_epi16(r2, r3);
+ b3 = _mm_unpackhi_epi16(r2, r3);
+ b4 = _mm_unpacklo_epi16(r4, r5);
+ b5 = _mm_unpackhi_epi16(r4, r5);
+ b6 = _mm_unpacklo_epi16(r6, r7);
+ b7 = _mm_unpackhi_epi16(r6, r7);
+
+ p0 = _mm_unpacklo_epi64(b0, b4);
+ p1 = _mm_unpackhi_epi64(b0, b4);
+ p2 = _mm_unpacklo_epi64(b1, b5);
+ p3 = _mm_unpackhi_epi64(b1, b5);
+ p4 = _mm_unpacklo_epi64(b2, b6);
+ p5 = _mm_unpackhi_epi64(b2, b6);
+ p6 = _mm_unpacklo_epi64(b3, b7);
+ p7 = _mm_unpackhi_epi64(b3, b7);
+
+
+ /* Vertical */
+
+ a0 = _mm_add_epi16(p0, p4); // p0 + p4
+ a1 = _mm_sub_epi16(p0, p4); // p0 - p4
+ r0 = _mm_srai_epi16(p2, 1); // p2 >> 1
+ a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1)
+ r0 = _mm_srai_epi16(p6, 1); // p6 >> 1
+ a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1)
+
+ b0 = _mm_add_epi16(a0, a3); // a0 + a3;
+ b2 = _mm_sub_epi16(a1, a2); // a1 - a2;
+ b4 = _mm_add_epi16(a1, a2); // a1 + a2;
+ b6 = _mm_sub_epi16(a0, a3); // a0 - a3;
+
+ //-p3 + p5 - p7 - (p7 >> 1);
+ r0 = _mm_srai_epi16(p7, 1); // p7 >> 1
+ a0 = _mm_sub_epi16(p5, p3); // p5 - p3
+ a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7
+ a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1)
+
+ //p1 + p7 - p3 - (p3 >> 1);
+ r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1)
+ a1 = _mm_add_epi16(p1, p7); // p1 + p7
+ a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3
+ a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1)
+
+ // -p1 + p7 + p5 + (p5 >> 1);
+ r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1)
+ a2 = _mm_sub_epi16(p7, p1); // p7 - p1
+ a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5
+ a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1)
+
+ // p3 + p5 + p1 + (p1 >> 1);
+ r0 = _mm_srai_epi16(p1, 1); // p1 >> 1
+ a3 = _mm_add_epi16(p3, p5); // p3+p5
+ a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1
+ a3 = _mm_add_epi16(a3, r0); //p3 + p5 + p1 + (p1 >> 1)
+
+ r0 = _mm_srai_epi16(a3, 2); // a3>>2
+ b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2);
+ r0 = _mm_srai_epi16(a2, 2); // a2>>2
+ b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2);
+ a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe
+ b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2);
+ a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe
+ b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2);
+
+ r0 = _mm_add_epi16(b0, b7); // b0 + b7;
+ _mm_store_si128((__m128i *)(block[0]), r0);
+ r1 = _mm_sub_epi16(b2, b5); // b2 - b5;
+ _mm_store_si128((__m128i *)(block[1]), r1);
+ r2 = _mm_add_epi16(b4, b3); // b4 + b3;
+ _mm_store_si128((__m128i *)(block[2]), r2);
+ r3 = _mm_add_epi16(b6, b1); // b6 + b1;
+ _mm_store_si128((__m128i *)(block[3]), r3);
+ r4 = _mm_sub_epi16(b6, b1); // b6 - b1;
+ _mm_store_si128((__m128i *)(block[4]), r4);
+ r5 = _mm_sub_epi16(b4, b3); // b4 - b3;
+ _mm_store_si128((__m128i *)(block[5]), r5);
+ r6 = _mm_add_epi16(b2, b5); // b2 + b5;
+ _mm_store_si128((__m128i *)(block[6]), r6);
+ r7 = _mm_sub_epi16(b0, b7); // b0 - b7;
+ _mm_store_si128((__m128i *)(block[7]), r7);
+}
+
+static void inverse8x8(h264_short_8x8block_row_t *block)
+{
+ int i;
+
+ //int tmp[64];
+ //int *pTmp = tmp;
+ int a0, a1, a2, a3;
+ int p0, p1, p2, p3, p4, p5 ,p6, p7;
+ int b0, b1, b2, b3, b4, b5, b6, b7;
+
+ // Horizontal
+ for (i=0; i < BLOCK_SIZE_8x8; i++)
+ {
+ p0 = block[i][0];
+ p1 = block[i][1];
+ p2 = block[i][2];
+ p3 = block[i][3];
+ p4 = block[i][4];
+ p5 = block[i][5];
+ p6 = block[i][6];
+ p7 = block[i][7];
+
+ a0 = p0 + p4;
+ a1 = p0 - p4;
+ a2 = p6 - (p2 >> 1);
+ a3 = p2 + (p6 >> 1);
+
+ b0 = a0 + a3;
+ b2 = a1 - a2;
+ b4 = a1 + a2;
+ b6 = a0 - a3;
+
+ a0 = p5 - p3 - p7 - (p7 >> 1);
+ a1 = p1 + p7 - p3 - (p3 >> 1);
+ a2 = p7 - p1 + p5 + (p5 >> 1);
+ a3 = p3 + p5 + p1 + (p1 >> 1);
+
+
+ b1 = a0 + (a3>>2);
+ b3 = a1 + (a2>>2);
+ b5 = a2 - (a1>>2);
+ b7 = a3 - (a0>>2);
+
+ block[i][0] = b0 + b7;
+ block[i][1] = b2 - b5;
+ block[i][2] = b4 + b3;
+ block[i][3] = b6 + b1;
+ block[i][4] = b6 - b1;
+ block[i][5] = b4 - b3;
+ block[i][6] = b2 + b5;
+ block[i][7] = b0 - b7;
+ }
+
+ // Vertical
+ for (i=0; i < BLOCK_SIZE_8x8; i++)
+ {
+ // pTmp = tmp + i;
+ p0 = block[0][i];
+ p1 = block[1][i];
+ p2 = block[2][i];
+ p3 = block[3][i];
+ p4 = block[4][i];
+ p5 = block[5][i];
+ p6 = block[6][i];
+ p7 = block[7][i];
+
+ a0 = p0 + p4;
+ a1 = p0 - p4;
+ a2 = p6 - (p2>>1);
+ a3 = p2 + (p6>>1);
+
+ b0 = a0 + a3;
+ b2 = a1 - a2;
+ b4 = a1 + a2;
+ b6 = a0 - a3;
+
+ a0 = -p3 + p5 - p7 - (p7 >> 1);
+ a1 = p1 + p7 - p3 - (p3 >> 1);
+ a2 = -p1 + p7 + p5 + (p5 >> 1);
+ a3 = p3 + p5 + p1 + (p1 >> 1);
+
+
+ b1 = a0 + (a3 >> 2);
+ b7 = a3 - (a0 >> 2);
+ b3 = a1 + (a2 >> 2);
+ b5 = a2 - (a1 >> 2);
+
+ block[0][i] = b0 + b7;
+ block[1][i] = b2 - b5;
+ block[2][i] = b4 + b3;
+ block[3][i] = b6 + b1;
+ block[4][i] = b6 - b1;
+ block[5][i] = b4 - b3;
+ block[6][i] = b2 + b5;
+ block[7][i] = b0 - b7;
+ }
+
+}
+#if defined(_DEBUG) || defined(_M_IX64)
+void itrans8x8_sse2(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x)
+{
+ __m128i a0, a1, a2, a3;
+ __m128i p0, p1, p2, p3, p4, p5 ,p6, p7;
+ __m128i b0, b1, b2, b3, b4, b5, b6, b7;
+ __m128i r0, r1, r2, r3, r4, r5, r6, r7;
+ __m128i const32, zero;
+ __declspec(align(32)) static const int16_t c32[8] = {32, 32, 32, 32, 32, 32, 32, 32};
+ __m128i pred0, pred1;
+
+ const32 = _mm_load_si128((const __m128i *)c32);
+ zero = _mm_setzero_si128();
+
+ // Horizontal
+ b0 = _mm_load_si128((__m128i *)(block[0]));
+ b1 = _mm_load_si128((__m128i *)(block[1]));
+ b2 = _mm_load_si128((__m128i *)(block[2]));
+ b3 = _mm_load_si128((__m128i *)(block[3]));
+ b4 = _mm_load_si128((__m128i *)(block[4]));
+ b5 = _mm_load_si128((__m128i *)(block[5]));
+ b6 = _mm_load_si128((__m128i *)(block[6]));
+ b7 = _mm_load_si128((__m128i *)(block[7]));
+
+ /* rotate 8x8 (ugh) */
+ r0 = _mm_unpacklo_epi16(b0, b2);
+ r1 = _mm_unpacklo_epi16(b1, b3);
+ r2 = _mm_unpackhi_epi16(b0, b2);
+ r3 = _mm_unpackhi_epi16(b1, b3);
+ r4 = _mm_unpacklo_epi16(b4, b6);
+ r5 = _mm_unpacklo_epi16(b5, b7);
+ r6 = _mm_unpackhi_epi16(b4, b6);
+ r7 = _mm_unpackhi_epi16(b5, b7);
+
+ b0 = _mm_unpacklo_epi16(r0, r1);
+ b1 = _mm_unpackhi_epi16(r0, r1);
+ b2 = _mm_unpacklo_epi16(r2, r3);
+ b3 = _mm_unpackhi_epi16(r2, r3);
+ b4 = _mm_unpacklo_epi16(r4, r5);
+ b5 = _mm_unpackhi_epi16(r4, r5);
+ b6 = _mm_unpacklo_epi16(r6, r7);
+ b7 = _mm_unpackhi_epi16(r6, r7);
+
+ p0 = _mm_unpacklo_epi64(b0, b4);
+ p1 = _mm_unpackhi_epi64(b0, b4);
+ p2 = _mm_unpacklo_epi64(b1, b5);
+ p3 = _mm_unpackhi_epi64(b1, b5);
+ p4 = _mm_unpacklo_epi64(b2, b6);
+ p5 = _mm_unpackhi_epi64(b2, b6);
+ p6 = _mm_unpacklo_epi64(b3, b7);
+ p7 = _mm_unpackhi_epi64(b3, b7);
+
+ /* perform approx DCT */
+ a0 = _mm_add_epi16(p0, p4); // p0 + p4
+ a1 = _mm_sub_epi16(p0, p4); // p0 - p4
+ r0 = _mm_srai_epi16(p2, 1); // p2 >> 1
+ a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1)
+ r0 = _mm_srai_epi16(p6, 1); // p6 >> 1
+ a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1)
+
+ b0 = _mm_add_epi16(a0, a3); // a0 + a3;
+ b2 = _mm_sub_epi16(a1, a2); // a1 - a2;
+ b4 = _mm_add_epi16(a1, a2); // a1 + a2;
+ b6 = _mm_sub_epi16(a0, a3); // a0 - a3;
+
+ //-p3 + p5 - p7 - (p7 >> 1);
+ r0 = _mm_srai_epi16(p7, 1); // p7 >> 1
+ a0 = _mm_sub_epi16(p5, p3); // p5 - p3
+ a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7
+ a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1)
+
+ //p1 + p7 - p3 - (p3 >> 1);
+ r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1)
+ a1 = _mm_add_epi16(p1, p7); // p1 + p7
+ a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3
+ a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1)
+
+ // -p1 + p7 + p5 + (p5 >> 1);
+ r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1)
+ a2 = _mm_sub_epi16(p7, p1); // p7 - p1
+ a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5
+ a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1)
+
+ // p3 + p5 + p1 + (p1 >> 1);
+ a3 = _mm_add_epi16(p3, p5); // p3+p5
+ a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1
+ p1 = _mm_srai_epi16(p1, 1); // p1 >> 1
+ a3 = _mm_add_epi16(a3, p1); //p3 + p5 + p1 + (p1 >> 1)
+
+ r0 = _mm_srai_epi16(a3, 2); // a3>>2
+ b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2);
+ r0 = _mm_srai_epi16(a2, 2); // a2>>2
+ b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2);
+ a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe
+ b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2);
+ a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe
+ b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2);
+
+ p0 = _mm_add_epi16(b0, b7); // b0 + b7;
+ p1 = _mm_sub_epi16(b2, b5); // b2 - b5;
+ p2 = _mm_add_epi16(b4, b3); // b4 + b3;
+ p3 = _mm_add_epi16(b6, b1); // b6 + b1;
+ p4 = _mm_sub_epi16(b6, b1); // b6 - b1;
+ p5 = _mm_sub_epi16(b4, b3); // b4 - b3;
+ p6 = _mm_add_epi16(b2, b5); // b2 + b5;
+ p7 = _mm_sub_epi16(b0, b7); // b0 - b7;
+
+ /* rotate 8x8 (ugh) */
+ r0 = _mm_unpacklo_epi16(p0, p2);
+ r1 = _mm_unpacklo_epi16(p1, p3);
+ r2 = _mm_unpackhi_epi16(p0, p2);
+ r3 = _mm_unpackhi_epi16(p1, p3);
+ r4 = _mm_unpacklo_epi16(p4, p6);
+ r5 = _mm_unpacklo_epi16(p5, p7);
+ r6 = _mm_unpackhi_epi16(p4, p6);
+ r7 = _mm_unpackhi_epi16(p5, p7);
+
+ b0 = _mm_unpacklo_epi16(r0, r1);
+ b1 = _mm_unpackhi_epi16(r0, r1);
+ b2 = _mm_unpacklo_epi16(r2, r3);
+ b3 = _mm_unpackhi_epi16(r2, r3);
+ b4 = _mm_unpacklo_epi16(r4, r5);
+ b5 = _mm_unpackhi_epi16(r4, r5);
+ b6 = _mm_unpacklo_epi16(r6, r7);
+ b7 = _mm_unpackhi_epi16(r6, r7);
+
+ p0 = _mm_unpacklo_epi64(b0, b4);
+ p1 = _mm_unpackhi_epi64(b0, b4);
+ p2 = _mm_unpacklo_epi64(b1, b5);
+ p3 = _mm_unpackhi_epi64(b1, b5);
+ p4 = _mm_unpacklo_epi64(b2, b6);
+ p5 = _mm_unpackhi_epi64(b2, b6);
+ p6 = _mm_unpacklo_epi64(b3, b7);
+ p7 = _mm_unpackhi_epi64(b3, b7);
+
+
+ /* Vertical */
+
+ a0 = _mm_add_epi16(p0, p4); // p0 + p4
+ a1 = _mm_sub_epi16(p0, p4); // p0 - p4
+ r0 = _mm_srai_epi16(p2, 1); // p2 >> 1
+ a2 = _mm_sub_epi16(p6, r0); // p6 - (p2 >> 1)
+ r0 = _mm_srai_epi16(p6, 1); // p6 >> 1
+ a3 = _mm_add_epi16(p2, r0); //p2 + (p6 >> 1)
+
+ b0 = _mm_add_epi16(a0, a3); // a0 + a3;
+ b2 = _mm_sub_epi16(a1, a2); // a1 - a2;
+ b4 = _mm_add_epi16(a1, a2); // a1 + a2;
+ b6 = _mm_sub_epi16(a0, a3); // a0 - a3;
+
+ //-p3 + p5 - p7 - (p7 >> 1);
+ r0 = _mm_srai_epi16(p7, 1); // p7 >> 1
+ a0 = _mm_sub_epi16(p5, p3); // p5 - p3
+ a0 = _mm_sub_epi16(a0, p7); // (-p3 + p5) - p7
+ a0 = _mm_sub_epi16(a0, r0); // (-p3 + p5 - p7) - (p7 >> 1)
+
+ //p1 + p7 - p3 - (p3 >> 1);
+ r0 = _mm_srai_epi16(p3, 1); // (p3 >> 1)
+ a1 = _mm_add_epi16(p1, p7); // p1 + p7
+ a1 = _mm_sub_epi16(a1, p3); // (p1 + p7) - p3
+ a1 = _mm_sub_epi16(a1, r0); // (p1 + p7 - p3) - (p3>>1)
+
+ // -p1 + p7 + p5 + (p5 >> 1);
+ r0 = _mm_srai_epi16(p5, 1); // (p5 >> 1)
+ a2 = _mm_sub_epi16(p7, p1); // p7 - p1
+ a2 = _mm_add_epi16(a2, p5); // -p1 + p7 + p5
+ a2 = _mm_add_epi16(a2, r0); // (-p1 + p7 + p5) + (p5 >> 1)
+
+ // p3 + p5 + p1 + (p1 >> 1);
+ r0 = _mm_srai_epi16(p1, 1); // p1 >> 1
+ a3 = _mm_add_epi16(p3, p5); // p3+p5
+ a3 = _mm_add_epi16(a3, p1); // p3 + p5 + p1
+ a3 = _mm_add_epi16(a3, r0); //p3 + p5 + p1 + (p1 >> 1)
+
+ r0 = _mm_srai_epi16(a3, 2); // a3>>2
+ b1 = _mm_add_epi16(a0, r0); //a0 + (a3>>2);
+ r0 = _mm_srai_epi16(a2, 2); // a2>>2
+ b3 = _mm_add_epi16(a1, r0); // a1 + (a2>>2);
+ a1 = _mm_srai_epi16(a1, 2); // all done with a1, so this is safe
+ b5 = _mm_sub_epi16(a2, a1); //a2 - (a1>>2);
+ a0 = _mm_srai_epi16(a0, 2); // all done with a0, so this is safe
+ b7 = _mm_sub_epi16(a3, a0); //a3 - (a0>>2);
+
+ r0 = _mm_add_epi16(b0, b7); // b0 + b7;
+ r1 = _mm_sub_epi16(b2, b5); // b2 - b5;
+ r2 = _mm_add_epi16(b4, b3); // b4 + b3;
+ r3 = _mm_add_epi16(b6, b1); // b6 + b1;
+ r4 = _mm_sub_epi16(b6, b1); // b6 - b1;
+ r5 = _mm_sub_epi16(b4, b3); // b4 - b3;
+ r6 = _mm_add_epi16(b2, b5); // b2 + b5;
+ r7 = _mm_sub_epi16(b0, b7); // b0 - b7;
+
+
+ // add in prediction values
+ pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[0][pos_x]));
+ pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[1][pos_x]));
+ // (x + 32) >> 6
+ r0 = _mm_adds_epi16(r0, const32);
+ r0 = _mm_srai_epi16(r0, 6);
+ r1 = _mm_adds_epi16(r1, const32);
+ r1 = _mm_srai_epi16(r1, 6);
+ pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+ pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+ pred0 = _mm_adds_epi16(pred0, r0);
+ pred1 = _mm_adds_epi16(pred1, r1);
+
+ pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ // store
+ _mm_storel_epi64((__m128i *)(&mb_rec[0][pos_x]), pred0);
+ // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+ pred0 = _mm_srli_si128(pred0, 8);
+ _mm_storel_epi64((__m128i *)(&mb_rec[1][pos_x]), pred0);
+
+ /* --- */
+
+ pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[2][pos_x]));
+ pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[3][pos_x]));
+ // (x + 32) >> 6
+ r2 = _mm_adds_epi16(r2, const32);
+ r2 = _mm_srai_epi16(r2, 6);
+ r3 = _mm_adds_epi16(r3, const32);
+ r3 = _mm_srai_epi16(r3, 6);
+ pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+ pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+ pred0 = _mm_adds_epi16(pred0, r2);
+ pred1 = _mm_adds_epi16(pred1, r3);
+
+ pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ // store
+ _mm_storel_epi64((__m128i *)(&mb_rec[2][pos_x]), pred0);
+ // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+ pred0 = _mm_srli_si128(pred0, 8);
+ _mm_storel_epi64((__m128i *)(&mb_rec[3][pos_x]), pred0);
+
+ /* --- */
+
+ pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[4][pos_x]));
+ pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[5][pos_x]));
+ // (x + 32) >> 6
+ r4 = _mm_adds_epi16(r4, const32);
+ r4 = _mm_srai_epi16(r4, 6);
+ r5 = _mm_adds_epi16(r5, const32);
+ r5 = _mm_srai_epi16(r5, 6);
+ pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+ pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+ pred0 = _mm_adds_epi16(pred0, r4);
+ pred1 = _mm_adds_epi16(pred1, r5);
+
+ pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ // store
+ _mm_storel_epi64((__m128i *)(&mb_rec[4][pos_x]), pred0);
+ // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+ pred0 = _mm_srli_si128(pred0, 8);
+ _mm_storel_epi64((__m128i *)(&mb_rec[5][pos_x]), pred0);
+
+ /* --- */
+
+ pred0 = _mm_loadl_epi64((__m128i *)(&mb_pred[6][pos_x]));
+ pred1 = _mm_loadl_epi64((__m128i *)(&mb_pred[7][pos_x]));
+ // (x + 32) >> 6
+ r6 = _mm_adds_epi16(r6, const32);
+ r6 = _mm_srai_epi16(r6, 6);
+ r7 = _mm_adds_epi16(r7, const32);
+ r7 = _mm_srai_epi16(r7, 6);
+ pred0 = _mm_unpacklo_epi8(pred0, zero); // convert to short
+ pred1 = _mm_unpacklo_epi8(pred1, zero); // convert to short
+ pred0 = _mm_adds_epi16(pred0, r6);
+ pred1 = _mm_adds_epi16(pred1, r7);
+
+ pred0 = _mm_packus_epi16(pred0, pred1); // convert to unsigned char
+
+ // store
+ _mm_storel_epi64((__m128i *)&mb_rec[6][pos_x], pred0);
+ // TODO: if mb_pred was converted to 4 8x8 blocks, we could store more easily.
+ pred0 = _mm_srli_si128(pred0, 8);
+ _mm_storel_epi64((__m128i *)&mb_rec[7][pos_x], pred0);
+}
+
+#endif
+
+#ifdef _M_IX86
+// TODO!! fix for 16bit coefficients instead of 32
+static void sample_reconstruct8x8_mmx(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *mb_rres8, int pos_x)
+{
+ __asm
+ {
+ mov esi, 8 // loop 8 times
+
+ mov eax, mb_rec
+ add eax, pos_x
+
+ mov ebx, mb_pred
+ add ebx, pos_x
+
+ mov ecx, mb_rres8
+
+ // mm0 : constant value 32
+ mov edx, 0x00200020
+ movd mm0, edx
+ punpckldq mm0, mm0
+ // mm5: zero
+ pxor mm7, mm7
+
+loop8:
+
+ movq mm1, MMWORD PTR 0[ecx]
+ paddw mm1, mm0 // rres + 32
+ psraw mm1, 6 // (rres + 32) >> 6
+ movq mm2, MMWORD PTR 0[ebx]
+ punpcklbw mm2, mm7 // convert pred_row from unsigned char to short
+ paddsw mm2, mm1 // pred_row + rres_row
+ packuswb mm2, mm7
+ movq MMWORD PTR 0[eax], mm2
+
+
+ add eax, 16
+ add ebx, 16
+ add ecx, 16
+
+ sub esi, 1
+ jne loop8
+ emms
+ }
+}
+#endif
+
+// benski> unused, left in place for unit testing and if we ever need to port the decoder to non-intel
+static void sample_reconstruct8x8(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *mb_rres8, int pos_x, int max_imgpel_value)
+{
+ int i,j;
+ for( j = 0; j < 8; j++)
+ {
+ imgpel *rec_row = mb_rec[j] + pos_x;
+ const short *rres_row = mb_rres8[j];
+ const imgpel *pred_row = mb_pred[j] + pos_x;
+
+ for( i = 0; i < 8; i++)
+ rec_row[i] = (imgpel) iClip1(max_imgpel_value, pred_row[i] + rshift_rnd_sf(rres_row[i], DQ_BITS_8));
+ }
+}
+/*!
+***********************************************************************
+* \brief
+* Inverse 8x8 transformation
+***********************************************************************
+*/
+#ifdef _M_IX86
+void itrans8x8_mmx(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x)
+{
+ inverse8x8((h264_short_8x8block_row_t *)block);
+ sample_reconstruct8x8_mmx(mb_rec, mb_pred, block, pos_x);
+}
+#endif
+
+void itrans8x8_c(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x)
+{
+ inverse8x8((h264_short_8x8block_row_t *)block);
+ sample_reconstruct8x8(mb_rec, mb_pred, block, pos_x, 255);
+}
+
+void itrans8x8_lossless(h264_imgpel_macroblock_row_t *mb_rec, const h264_imgpel_macroblock_row_t *mb_pred, const h264_short_8x8block_row_t *block, int pos_x)
+{
+ int i,j;
+
+ for( j = 0; j < 8; j++)
+ {
+ imgpel *rec_row = mb_rec[j] + pos_x;
+ const short *rres_row = block[j];
+ const imgpel *pred_row = mb_pred[j] + pos_x;
+ for( i = 0; i < 8; i++)
+ rec_row[i] = (imgpel) iClip1(255, (rres_row[i] + (long)pred_row[i]));
+ }
+} \ No newline at end of file
diff --git a/Src/h264dec/ldecod/src/vlc.c b/Src/h264dec/ldecod/src/vlc.c
new file mode 100644
index 00000000..397a7d08
--- /dev/null
+++ b/Src/h264dec/ldecod/src/vlc.c
@@ -0,0 +1,1769 @@
+/*!
+************************************************************************
+* \file vlc.c
+*
+* \brief
+* VLC support functions
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Inge Lille-Langøy <inge.lille-langoy@telenor.com>
+* - Detlev Marpe <marpe@hhi.de>
+* - Gabi Blaettermann
+************************************************************************
+*/
+#include "contributors.h"
+
+#include "global.h"
+#include "vlc.h"
+#include "elements.h"
+#include "optim.h"
+#include <emmintrin.h>
+
+// A little trick to avoid those horrible #if TRACE all over the source code
+#if TRACE
+#define SYMTRACESTRING(s) strncpy(symbol.tracestring,s,TRACESTRING_SIZE)
+#else
+#define SYMTRACESTRING(s) // do nothing
+#endif
+
+static int ShowBits (const uint8_t buffer[],int totbitoffset,int bitcount, int numbits);
+
+// Note that all NA values are filled with 0
+
+/*!
+*************************************************************************************
+* \brief
+* ue_v, reads an ue(v) syntax element, the length in bits is stored in
+* the global p_Dec->UsedBits variable
+*
+* \param tracestring
+* the string for the trace file
+*
+* \param bitstream
+* the stream to be read from
+*
+* \return
+* the value of the coded syntax element
+*
+*************************************************************************************
+*/
+int ue_v (const char *tracestring, Bitstream *bitstream)
+{
+ SyntaxElement symbol;
+
+ //assert (bitstream->streamBuffer != NULL);
+ symbol.mapping = linfo_ue; // Mapping rule
+ SYMTRACESTRING(tracestring);
+ readSyntaxElement_VLC (&symbol, bitstream);
+ return symbol.value1;
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+* ue_v, reads an se(v) syntax element, the length in bits is stored in
+* the global p_Dec->UsedBits variable
+*
+* \param tracestring
+* the string for the trace file
+*
+* \param bitstream
+* the stream to be read from
+*
+* \return
+* the value of the coded syntax element
+*
+*************************************************************************************
+*/
+int se_v (const char *tracestring, Bitstream *bitstream)
+{
+ SyntaxElement symbol;
+
+ //assert (bitstream->streamBuffer != NULL);
+ symbol.mapping = linfo_se; // Mapping rule: signed integer
+ SYMTRACESTRING(tracestring);
+ readSyntaxElement_VLC (&symbol, bitstream);
+ return symbol.value1;
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+* ue_v, reads an u(v) syntax element, the length in bits is stored in
+* the global p_Dec->UsedBits variable
+*
+* \param LenInBits
+* length of the syntax element
+*
+* \param tracestring
+* the string for the trace file
+*
+* \param bitstream
+* the stream to be read from
+*
+* \return
+* the value of the coded syntax element
+*
+*************************************************************************************
+*/
+int u_v (int LenInBits, const char*tracestring, Bitstream *bitstream)
+{
+ return readSyntaxElement_FLC(bitstream, LenInBits);
+}
+
+/*!
+*************************************************************************************
+* \brief
+* i_v, reads an i(v) syntax element, the length in bits is stored in
+* the global p_Dec->UsedBits variable
+*
+* \param LenInBits
+* length of the syntax element
+*
+* \param tracestring
+* the string for the trace file
+*
+* \param bitstream
+* the stream to be read from
+*
+* \return
+* the value of the coded syntax element
+*
+*************************************************************************************
+*/
+int i_v (int LenInBits, const char*tracestring, Bitstream *bitstream)
+{
+ int val;
+ val = readSyntaxElement_FLC (bitstream, LenInBits);
+
+ // can be negative
+ val = -( val & (1 << (LenInBits - 1)) ) | val;
+
+ return val;
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+* ue_v, reads an u(1) syntax element, the length in bits is stored in
+* the global p_Dec->UsedBits variable
+*
+* \param tracestring
+* the string for the trace file
+*
+* \param bitstream
+* the stream to be read from
+*
+* \return
+* the value of the coded syntax element
+*
+*************************************************************************************
+*/
+Boolean u_1 (const char *tracestring, Bitstream *bitstream)
+{
+ return (Boolean) u_v (1, tracestring, bitstream);
+}
+
+
+
+/*!
+************************************************************************
+* \brief
+* mapping rule for ue(v) syntax elements
+* \par Input:
+* lenght and info
+* \par Output:
+* number in the code table
+************************************************************************
+*/
+void linfo_ue(int len, int info, int *value1, int *dummy)
+{
+ //assert ((len >> 1) < 32);
+ *value1 = (int) (((unsigned int) 1 << (len >> 1)) + (unsigned int) (info) - 1);
+}
+
+/*!
+************************************************************************
+* \brief
+* mapping rule for se(v) syntax elements
+* \par Input:
+* lenght and info
+* \par Output:
+* signed mvd
+************************************************************************
+*/
+void linfo_se(int len, int info, int *value1, int *dummy)
+{
+ //assert ((len >> 1) < 32);
+ unsigned int n = ((unsigned int) 1 << (len >> 1)) + (unsigned int) info - 1;
+ *value1 = (n + 1) >> 1;
+ if((n & 0x01) == 0) // lsb is signed bit
+ *value1 = -*value1;
+}
+
+
+/*!
+************************************************************************
+* \par Input:
+* length and info
+* \par Output:
+* cbp (intra)
+************************************************************************
+*/
+void linfo_cbp_intra_normal(int len,int info,int *cbp, int *dummy)
+{
+ int cbp_idx;
+
+ linfo_ue(len, info, &cbp_idx, dummy);
+ *cbp=NCBP[1][cbp_idx][0];
+}
+
+
+/*!
+************************************************************************
+* \par Input:
+* length and info
+* \par Output:
+* cbp (intra)
+************************************************************************
+*/
+void linfo_cbp_intra_other(int len,int info,int *cbp, int *dummy)
+{
+ int cbp_idx;
+
+ linfo_ue(len, info, &cbp_idx, dummy);
+ *cbp=NCBP[0][cbp_idx][0];
+}
+
+/*!
+************************************************************************
+* \par Input:
+* length and info
+* \par Output:
+* cbp (inter)
+************************************************************************
+*/
+void linfo_cbp_inter_normal(int len,int info,int *cbp, int *dummy)
+{
+ int cbp_idx;
+
+ linfo_ue(len, info, &cbp_idx, dummy);
+ *cbp=NCBP[1][cbp_idx][1];
+}
+
+/*!
+************************************************************************
+* \par Input:
+* length and info
+* \par Output:
+* cbp (inter)
+************************************************************************
+*/
+void linfo_cbp_inter_other(int len,int info,int *cbp, int *dummy)
+{
+ int cbp_idx;
+
+ linfo_ue(len, info, &cbp_idx, dummy);
+ *cbp=NCBP[0][cbp_idx][1];
+}
+
+/*!
+************************************************************************
+* \par Input:
+* length and info
+* \par Output:
+* level, run
+************************************************************************
+*/
+void linfo_levrun_inter(int len, int info, int *level, int *irun)
+{
+ //assert (((len >> 1) - 5) < 32);
+
+ if (len <= 9)
+ {
+ int l2 = imax(0,(len >> 1)-1);
+ int inf = info >> 1;
+
+ *level = NTAB1[l2][inf][0];
+ *irun = NTAB1[l2][inf][1];
+ if ((info & 0x01) == 1)
+ *level = -*level; // make sign
+ }
+ else // if len > 9, skip using the array
+ {
+ *irun = (info & 0x1e) >> 1;
+ *level = LEVRUN1[*irun] + (info >> 5) + ( 1 << ((len >> 1) - 5));
+ if ((info & 0x01) == 1)
+ *level = -*level;
+ }
+
+ if (len == 1) // EOB
+ *level = 0;
+}
+
+
+/*!
+************************************************************************
+* \par Input:
+* length and info
+* \par Output:
+* level, run
+************************************************************************
+*/
+void linfo_levrun_c2x2(int len, int info, int *level, int *irun)
+{
+ if (len<=5)
+ {
+ int l2 = imax(0, (len >> 1) - 1);
+ int inf = info >> 1;
+ *level = NTAB3[l2][inf][0];
+ *irun = NTAB3[l2][inf][1];
+ if ((info & 0x01) == 1)
+ *level = -*level; // make sign
+ }
+ else // if len > 5, skip using the array
+ {
+ *irun = (info & 0x06) >> 1;
+ *level = LEVRUN3[*irun] + (info >> 3) + (1 << ((len >> 1) - 3));
+ if ((info & 0x01) == 1)
+ *level = -*level;
+ }
+
+ if (len == 1) // EOB
+ *level = 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* read next UVLC codeword from UVLC-partition and
+* map it to the corresponding syntax element
+************************************************************************
+*/
+int readSyntaxElement_VLC(SyntaxElement *sym, Bitstream *currStream)
+{
+
+ int info;
+ sym->len = GetVLCSymbol (currStream->streamBuffer, currStream->frame_bitoffset, &info, currStream->bitstream_length);
+ if (sym->len == -1)
+ return -1;
+
+ currStream->frame_bitoffset += sym->len;
+ sym->mapping(sym->len, info, &(sym->value1), &(sym->value2));
+
+ return 1;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* read next UVLC codeword from UVLC-partition and
+* map it to the corresponding syntax element
+************************************************************************
+*/
+int readSyntaxElement_UVLC(SyntaxElement *sym, struct datapartition *dp)
+{
+ return (readSyntaxElement_VLC(sym, dp->bitstream));
+}
+
+/*!
+************************************************************************
+* \brief
+* read next VLC codeword for 4x4 Intra Prediction Mode and
+* map it to the corresponding Intra Prediction Direction
+************************************************************************
+*/
+int readSyntaxElement_Intra4x4PredictionMode(SyntaxElement *sym, Bitstream *currStream)
+{
+ int info;
+ sym->len = GetVLCSymbol_IntraMode (currStream->streamBuffer, currStream->frame_bitoffset, &info, currStream->bitstream_length);
+
+ if (sym->len == -1)
+ return -1;
+
+ currStream->frame_bitoffset += sym->len;
+ sym->value1 = (sym->len == 1) ? -1 : info;
+
+#if TRACE
+ tracebits2(sym->tracestring, sym->len, sym->value1);
+#endif
+
+ return 1;
+}
+
+int GetVLCSymbol_IntraMode (const uint8_t buffer[],int totbitoffset,int *info, int bytecount)
+{
+ int byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ int bitoffset = (7 - (totbitoffset & 0x07)); // bit from start of byte
+ const uint8_t *cur_byte = &(buffer[byteoffset]);
+ int ctr_bit = (*cur_byte & (0x01 << bitoffset)); // control bit for current bit posision
+
+ //First bit
+ if (ctr_bit)
+ {
+ *info = 0;
+ return 1;
+ }
+
+ if (byteoffset >= bytecount)
+ {
+ return -1;
+ }
+ else
+ {
+ int inf = (*(cur_byte) << 8) + *(cur_byte + 1);
+ inf <<= (sizeof(uint8_t) * 8) - bitoffset;
+ inf = inf & 0xFFFF;
+ inf >>= (sizeof(uint8_t) * 8) * 2 - 3;
+
+ *info = inf;
+ return 4; // return absolute offset in bit from start of frame
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* test if bit buffer contains only stop bit
+*
+* \param buffer
+* buffer containing VLC-coded data bits
+* \param totbitoffset
+* bit offset from start of partition
+* \param bytecount
+* buffer length
+* \return
+* true if more bits available
+************************************************************************
+*/
+int more_rbsp_data (const uint8_t buffer[],int totbitoffset,int bytecount)
+{
+ long byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ // there is more until we're in the last byte
+ if (byteoffset < (bytecount - 1))
+ return TRUE;
+ else
+ {
+ int bitoffset = (7 - (totbitoffset & 0x07)); // bit from start of byte
+ const uint8_t *cur_byte = &(buffer[byteoffset]);
+ // read one bit
+ int ctr_bit = ctr_bit = ((*cur_byte)>> (bitoffset--)) & 0x01; // control bit for current bit posision
+
+ //assert (byteoffset<bytecount);
+
+ // a stop bit has to be one
+ if (ctr_bit==0)
+ return TRUE;
+ else
+ {
+ int cnt = 0;
+
+ while (bitoffset>=0 && !cnt)
+ {
+ cnt |= ((*cur_byte)>> (bitoffset--)) & 0x01; // set up control bit
+ }
+
+ return (cnt);
+ }
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Check if there are symbols for the next MB
+************************************************************************
+*/
+int uvlc_startcode_follows(Slice *currSlice, int dummy)
+{
+ byte dp_Nr = assignSE2partition[currSlice->dp_mode][SE_MBTYPE];
+ DataPartition *dP = &(currSlice->partArr[dp_Nr]);
+ Bitstream *currStream = dP->bitstream;
+ const uint8_t *buf = currStream->streamBuffer;
+
+ return (!(more_rbsp_data(buf, currStream->frame_bitoffset,currStream->bitstream_length)));
+}
+
+
+
+/*!
+************************************************************************
+* \brief
+* read one exp-golomb VLC symbol
+*
+* \param buffer
+* containing VLC-coded data bits
+* \param totbitoffset
+* bit offset from start of partition
+* \param info
+* returns the value of the symbol
+* \param bytecount
+* buffer length
+* \return
+* bits read
+************************************************************************
+*/
+int GetVLCSymbol (const uint8_t buffer[],int totbitoffset,int *info, int bytecount)
+{
+ long byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ int bitoffset = (7 - (totbitoffset & 0x07)); // bit from start of byte
+ int bitcounter = 1;
+ int len = 0;
+ const uint8_t *cur_byte = &(buffer[byteoffset]);
+ int ctr_bit = ((*cur_byte) >> (bitoffset)) & 0x01; // control bit for current bit posision
+
+ while (ctr_bit == 0)
+ { // find leading 1 bit
+ len++;
+ bitcounter++;
+ bitoffset--;
+ bitoffset &= 0x07;
+ cur_byte += (bitoffset == 7);
+ byteoffset+= (bitoffset == 7);
+ ctr_bit = ((*cur_byte) >> (bitoffset)) & 0x01;
+ }
+
+ if (byteoffset + ((len + 7) >> 3) > bytecount)
+ return -1;
+ else
+ {
+ // make infoword
+ int inf = 0; // shortest possible code is 1, then info is always 0
+
+ while (len--)
+ {
+ bitoffset --;
+ bitoffset &= 0x07;
+ cur_byte += (bitoffset == 7);
+ bitcounter++;
+ inf <<= 1;
+ inf |= ((*cur_byte) >> (bitoffset)) & 0x01;
+ }
+
+ *info = inf;
+ return bitcounter; // return absolute offset in bit from start of frame
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Reads bits from the bitstream buffer (Threshold based)
+*
+* \param inf
+* bytes to extract numbits from with bitoffset already applied
+* \param numbits
+* number of bits to read
+*
+************************************************************************
+*/
+
+static inline int ShowBitsThres16(int inf, int numbits)
+{
+ return ((inf) >> ((sizeof(uint8_t) * 16) - (numbits)));
+}
+
+//static inline int ShowBitsThres (int inf, int bitcount, int numbits)
+static inline int ShowBitsThres(int inf, int numbits)
+{
+ return ((inf) >> ((sizeof(uint8_t) * 24) - (numbits)));
+ /*
+ if ((numbits + 7) > bitcount)
+ {
+ return -1;
+ }
+ else
+ {
+ //Worst case scenario is that we will need to traverse 3 bytes
+ inf >>= (sizeof(byte)*8)*3 - numbits;
+ }
+
+ return inf; //Will be a small unsigned integer so will not need any conversion when returning as int
+ */
+}
+
+
+/*!
+************************************************************************
+* \brief
+* code from bitstream (2d tables)
+************************************************************************
+*/
+
+static int code_from_bitstream_2d(SyntaxElement *sym,
+ Bitstream *currStream,
+ const uint8_t *lentab,
+ const uint8_t *codtab,
+ int tabwidth,
+ int tabheight,
+ int *code)
+{
+ int i, j;
+ const uint8_t *len = &lentab[0], *cod = &codtab[0];
+
+ int *frame_bitoffset = &currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[*frame_bitoffset >> 3];
+
+ //Apply bitoffset to three bytes (maximum that may be traversed by ShowBitsThres)
+ unsigned int inf = ((*buf) << 16) + (*(buf + 1) << 8) + *(buf + 2); //Even at the end of a stream we will still be pulling out of allocated memory as alloc is done by MAX_CODED_FRAME_SIZE
+ inf <<= (*frame_bitoffset & 0x07); //Offset is constant so apply before extracting different numbers of bits
+ inf &= 0xFFFFFF; //Arithmetic shift so wipe any sign which may be extended inside ShowBitsThres
+
+ // this VLC decoding method is not optimized for speed
+ for (j = 0; j < tabheight; j++)
+ {
+ for (i = 0; i < tabwidth; i++)
+ {
+ if ((*len == 0) || (ShowBitsThres(inf, *len) != *cod))
+ {
+ len++;
+ cod++;
+ }
+ else
+ {
+ sym->len = *len;
+ *frame_bitoffset += *len; // move bitstream pointer
+ *code = *cod;
+ sym->value1 = i;
+ sym->value2 = j;
+ return 0; // found code and return
+ }
+ }
+ }
+ return -1; // failed to find code
+}
+
+static int code_from_bitstream_2d_16_1(Bitstream *currStream,
+ const uint8_t *lentab,
+ const uint8_t *codtab)
+{
+ int i;
+ const uint8_t *len = &lentab[0], *cod = &codtab[0];
+
+ int *frame_bitoffset = &currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[*frame_bitoffset >> 3];
+
+ //Apply bitoffset to three bytes (maximum that may be traversed by ShowBitsThres)
+ unsigned int inf = ((*buf) << 16) + (*(buf + 1) << 8) + *(buf + 2); //Even at the end of a stream we will still be pulling out of allocated memory as alloc is done by MAX_CODED_FRAME_SIZE
+ inf <<= (*frame_bitoffset & 0x07); //Offset is constant so apply before extracting different numbers of bits
+ inf &= 0xFFFFFF; //Arithmetic shift so wipe any sign which may be extended inside ShowBitsThres
+
+ // this VLC decoding method is not optimized for speed
+ for (i = 0; i < 16 && len[i]; i++)
+ {
+ if (ShowBitsThres(inf, len[i]) == cod[i])
+ {
+ *frame_bitoffset += len[i]; // move bitstream pointer
+ return i; // found code and return
+ }
+ }
+
+ return -1; // failed to find code
+}
+
+int code_from_bitstream_2d_16_1_sse2(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab)
+{
+ unsigned long result;
+
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+ uint16_t inf;
+
+ __m128i xmm_inf, xmm_mask, xmm_cod;
+ int match;
+ unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf);
+ _inf >>= 16-(frame_bitoffset & 0x07);
+ _inf &= 0xFFFF;
+ inf = (uint16_t)_inf;
+
+ xmm_inf = _mm_set1_epi16(inf);
+
+ xmm_cod = _mm_load_si128((__m128i *)codtab);
+ xmm_mask = _mm_load_si128((__m128i *)masktab);
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += lentab[result]; // move bitstream pointer
+ return result; // found code and return
+ }
+
+ xmm_cod = _mm_load_si128((__m128i *)(codtab+8));
+ xmm_mask = _mm_load_si128((__m128i *)(masktab+8));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += lentab[result+8]; // move bitstream pointer
+ return result+8;
+ }
+
+
+ return -1;
+}
+
+int code_from_bitstream_2d_16_1_c(Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab)
+{
+ int i;
+
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+ uint16_t inf;
+
+ unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf);
+ _inf >>= 16-(frame_bitoffset & 0x07);
+ _inf &= 0xFFFF;
+ inf = (uint16_t)_inf;
+
+ // this VLC decoding method is not optimized for speed
+ for (i=0; i < 16; i++)
+ {
+ if ((inf & masktab[i]) == codtab[i])//ShowBitsThres(inf, len[i]) == cod[i])
+ {
+ currStream->frame_bitoffset += lentab[i]; // move bitstream pointer
+ return i; // found code and return
+ }
+ }
+
+ return -1; // failed to find code
+}
+
+int code_from_bitstream_2d_17_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab)
+{
+ unsigned long result;
+ const uint16_t *len = lentab, *cod = codtab, *mask = masktab;
+
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+ uint16_t inf;
+
+ __m128i xmm_inf, xmm_mask, xmm_cod;
+ int match;
+ unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf);
+ _inf >>= 16-(frame_bitoffset & 0x07);
+ _inf &= 0xFFFF;
+ inf = (uint16_t)_inf;
+
+ xmm_inf = _mm_set1_epi16(inf);
+
+ xmm_cod = _mm_loadu_si128((__m128i *)cod);
+ xmm_mask = _mm_loadu_si128((__m128i *)mask);
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result]; // move bitstream pointer
+ sym->value1 = result;
+ sym->value2 = 0;
+ return 0; // found code and return
+ }
+
+ /* second table - rows 1-8 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+17));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+17));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+17]; // move bitstream pointer
+ sym->value1 = 1+result;
+ sym->value2 = 1;
+ return 0; // found code and return
+ }
+
+ /* first table, rows 9-16 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+8));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+8));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+8]; // move bitstream pointer
+ sym->value1 = 8+result;
+ sym->value2 = 0;
+ return 0; // found code and return
+ }
+
+ /* extra one just for first table */
+ if ((inf & mask[16]) == cod[16])//ShowBitsThres(inf, len[i]) == cod[i])
+ {
+ currStream->frame_bitoffset += len[16]; // move bitstream pointer
+ sym->value1 = 16;
+ sym->value2 = 0;
+ return 0; // found code and return
+ }
+
+
+
+ /* second table - rows 9-16 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+25));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+25));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+25]; // move bitstream pointer
+ sym->value1 = 9+result;
+ sym->value2 = 1;
+ return 0; // found code and return
+ }
+
+
+ /* third table - rows 1-8 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+34));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+34));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+34]; // move bitstream pointer
+ sym->value1 = 2+result;
+ sym->value2 = 2;
+ return 0; // found code and return
+ }
+
+
+ /* third table - rows 9-16 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+42));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+42));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+42]; // move bitstream pointer
+ sym->value1 = 10+result;
+ sym->value2 = 2;
+ return 0; // found code and return
+ }
+
+ /* fourth table - rows 1-8 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+51));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+51));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+51]; // move bitstream pointer
+ sym->value1 = 3+result;
+ sym->value2 = 3;
+ return 0; // found code and return
+ }
+
+ /* fourth table - rows 9-16 */
+ xmm_cod = _mm_loadu_si128((__m128i *)(cod+59));
+ xmm_mask = _mm_loadu_si128((__m128i *)(mask+59));
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi16(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ _BitScanForward(&result, match);
+ result >>= 1;
+
+ currStream->frame_bitoffset += len[result+59]; // move bitstream pointer
+ sym->value1 = 11+result;
+ sym->value2 = 3;
+ return 0; // found code and return
+ }
+
+ return -1; // failed to find code
+}
+
+
+int code_from_bitstream_2d_17_4_c(SyntaxElement *sym, Bitstream *currStream, const uint16_t *lentab, const uint16_t *codtab, const uint16_t *masktab)
+{
+ int i, j;
+ const uint16_t *len, *cod, *mask;
+
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+ uint16_t inf;
+
+ unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf);
+ _inf >>= 16-(frame_bitoffset & 0x07);
+ _inf &= 0xFFFF;
+ inf = (uint16_t)_inf;
+
+ for (j=0;j<4;j++)
+ {
+ len = &lentab[j*17];
+ cod = &codtab[j*17];
+ mask = &masktab[j*17];
+ // this VLC decoding method is not optimized for speed
+ for (i=0; i < 17; i++)
+ {
+ if ((inf & mask[i]) == cod[i])//ShowBitsThres(inf, len[i]) == cod[i])
+ {
+ currStream->frame_bitoffset += len[i]; // move bitstream pointer
+ sym->value1 = j+i;
+ sym->value2 = j;
+ return 0; // found code and return
+ }
+ }
+ }
+
+ return -1; // failed to find code
+}
+
+static int code_from_bitstream_2d_9_4(SyntaxElement *sym,
+ Bitstream *currStream,
+ const uint16_t *lentab,
+ const uint16_t *codtab,
+ const uint16_t *masktab)
+{
+ int i, j;
+ const uint16_t *len, *cod, *mask;
+
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+
+ uint16_t inf;
+ unsigned int _inf = _byteswap_ulong(*(unsigned long *)buf);
+ _inf >>= 16-(frame_bitoffset & 0x07);
+ _inf &= 0xFFFF;
+ inf = (uint16_t)_inf;
+
+ // this VLC decoding method is not optimized for speed
+ for (j = 0; j < 4; j++)
+ {
+ len = &lentab[j*9];
+ cod = &codtab[j*9];
+ mask = &masktab[j*9];
+
+ for (i=0; i < 9; i++)
+ {
+ if ((inf & mask[i]) == cod[i])
+ {
+ sym->len = len[i];
+ currStream->frame_bitoffset += len[i]; // move bitstream pointer
+ sym->value1 = j+i;
+ sym->value2 = j;
+ return 0; // found code and return
+ }
+ }
+ }
+ return -1; // failed to find code
+}
+
+int code_from_bitstream_2d_5_4_c(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab)
+{
+
+ int i;
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+ uint8_t inf;
+
+ unsigned int _inf = _byteswap_ushort(*(unsigned short *)buf);
+ _inf >>= 8-(frame_bitoffset & 0x07);
+ _inf &= 0xFF;
+ inf = (uint8_t)_inf;
+
+ for (i = 0; i<16;i++)
+ {
+ if ((inf & masktab[i]) == codtab[i])
+ {
+ currStream->frame_bitoffset += lentab[i]; // move bitstream pointer
+ sym->value2 = (i<<1)/9;
+ sym->value1 = sym->value2 + (((i<<1)%9)>>1);
+
+ return 0; // found code and return
+ }
+ }
+
+ return -1; // failed to find code
+
+}
+
+
+int code_from_bitstream_2d_5_4_sse2(SyntaxElement *sym, Bitstream *currStream, const uint8_t *lentab, const uint8_t *codtab, const uint8_t *masktab)
+{
+ int frame_bitoffset = currStream->frame_bitoffset;
+ const uint8_t *buf = &currStream->streamBuffer[frame_bitoffset >> 3];
+ uint8_t inf;
+ __m128i xmm_inf, xmm_mask, xmm_cod;
+ int match;
+ unsigned int _inf = _byteswap_ushort(*(unsigned short *)buf);
+ _inf >>= 8-(frame_bitoffset & 0x07);
+ _inf &= 0xFF;
+ inf = (uint8_t)_inf;
+
+ xmm_inf = _mm_set1_epi8(_inf);
+
+ xmm_cod = _mm_load_si128((__m128i *)codtab);
+ xmm_mask = _mm_load_si128((__m128i *)masktab);
+ xmm_mask = _mm_and_si128(xmm_mask, xmm_inf); // mask = mask & inf
+ xmm_mask = _mm_cmpeq_epi8(xmm_mask, xmm_cod); // mask == cod
+ match = _mm_movemask_epi8(xmm_mask);
+ if (match)
+ {
+ unsigned long result;
+ _BitScanForward(&result, match);
+
+ currStream->frame_bitoffset += lentab[result]; // move bitstream pointer
+ sym->value2 = (result<<1)/9;
+ sym->value1 = sym->value2 + (((result<<1)%9)>>1);
+ return 0; // found code and return
+ }
+ return -1;
+}
+
+/*!
+************************************************************************
+* \brief
+* read FLC codeword from UVLC-partition
+************************************************************************
+*/
+int readSyntaxElement_FLC(Bitstream *currStream, int numbits)
+{
+ int totbitoffset = currStream->frame_bitoffset;
+ int bitoffset = /*7 - */(totbitoffset & 0x07); // bit from start of byte
+ int byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ const uint8_t *ptr = &(currStream->streamBuffer[byteoffset]);
+
+ uint32_t tmp = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8);// | (ptr[3]);
+ tmp <<= bitoffset;
+ tmp >>= 32 - numbits;
+ currStream->frame_bitoffset += numbits;
+ return tmp;
+}
+
+
+
+/*!
+************************************************************************
+* \brief
+* read NumCoeff/TrailingOnes codeword from UVLC-partition
+************************************************************************
+*/
+
+int readSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *sym,
+ Bitstream *currStream,
+ int vlcnum)
+{
+ int frame_bitoffset = currStream->frame_bitoffset;
+ int BitstreamLengthInBytes = currStream->bitstream_length;
+ int BitstreamLengthInBits = (BitstreamLengthInBytes << 3) + 7;
+ const uint8_t *buf = currStream->streamBuffer;
+
+ static const uint16_t lentab[3][4][17] =
+ {
+ { // 0702
+ { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+ { 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16, 0},
+ { 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16, 0, 0},
+ { 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16, 0, 0, 0},
+ },
+ {
+ { 2, 6, 6, 7, 8, 8, 9,11,11,12,12,12,13,13,13,14,14},
+ { 2, 5, 6, 6, 7, 8, 9,11,11,12,12,13,13,14,14,14, 0},
+ { 3, 6, 6, 7, 8, 9,11,11,12,12,13,13,13,14,14, 0, 0},
+ { 4, 4, 5, 6, 6, 7, 9,11,11,12,13,13,13,14, 0, 0, 0},
+ },
+ {
+ { 4, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,10},
+ { 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9,10,10,10, 0},
+ { 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,10, 0, 0},
+ { 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9,10,10,10, 0, 0, 0},
+ },
+ };
+#if 0 // save for reference
+ static const uint32_t codtab[3][4][17] =
+ {
+ {
+ { 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7,4},
+ { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10,6},
+ { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9,5},
+ { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12,8},
+ },
+ {
+ { 3,11, 7, 7, 7, 4, 7,15,11,15,11, 8,15,11, 7, 9,7},
+ { 0, 2, 7,10, 6, 6, 6, 6,14,10,14,10,14,10,11, 8,6},
+ { 0, 0, 3, 9, 5, 5, 5, 5,13, 9,13, 9,13, 9, 6,10,5},
+ { 0, 0, 0, 5, 4, 6, 8, 4, 4, 4,12, 8,12,12, 8, 1,4},
+ },
+ {
+ {15,15,11, 8,15,11, 9, 8,15,11,15,11, 8,13, 9, 5,1},
+ { 0,14,15,12,10, 8,14,10,14,14,10,14,10, 7,12, 8,4},
+ { 0, 0,13,14,11, 9,13, 9,13,10,13, 9,13, 9,11, 7,3},
+ { 0, 0, 0,12,11,10, 9, 8,13,12,12,12, 8,12,10, 6,2},
+ },
+ };
+#endif
+ static const uint16_t codtab[3][4][17] =
+ {
+ {
+ { 0x8000, 0x1400, 0x0700, 0x0380, 0x01C0, 0x00E0, 0x0078, 0x0058, 0x0040, 0x003C, 0x002C, 0x001E, 0x0016, 0x000F, 0x000B, 0x0007, 0x0004 },
+ { 0x4000, 0x1000, 0x0600, 0x0300, 0x0180, 0x00C0, 0x0070, 0x0050, 0x0038, 0x0028, 0x001C, 0x0014, 0x0002, 0x000E, 0x000A, 0x0006, 0xFFFF },
+ { 0x2000, 0x0A00, 0x0500, 0x0280, 0x0140, 0x00A0, 0x0068, 0x0048, 0x0034, 0x0024, 0x001A, 0x0012, 0x000D, 0x0009, 0x0005, 0xFFFF, 0xFFFF },
+ { 0x1800, 0x0C00, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0060, 0x0030, 0x0020, 0x0018, 0x0010, 0x000C, 0x0008, 0xFFFF, 0xFFFF, 0xFFFF }
+ },
+ {
+ { 0xC000, 0x2C00, 0x1C00, 0x0E00, 0x0700, 0x0400, 0x0380, 0x01E0, 0x0160, 0x00F0, 0x00B0, 0x0080, 0x0078, 0x0058, 0x0038, 0x0024, 0x001C },
+ { 0x8000, 0x3800, 0x2800, 0x1800, 0x0C00, 0x0600, 0x0300, 0x01C0, 0x0140, 0x00E0, 0x00A0, 0x0070, 0x0050, 0x002C, 0x0020, 0x0018, 0xFFFF },
+ { 0x6000, 0x2400, 0x1400, 0x0A00, 0x0500, 0x0280, 0x01A0, 0x0120, 0x00D0, 0x0090, 0x0068, 0x0048, 0x0030, 0x0028, 0x0014, 0xFFFF, 0xFFFF },
+ { 0x5000, 0x4000, 0x3000, 0x2000, 0x1000, 0x0800, 0x0200, 0x0180, 0x0100, 0x00C0, 0x0060, 0x0040, 0x0008, 0x0010, 0xFFFF, 0xFFFF, 0xFFFF }
+ },
+ {
+ { 0xF000, 0x3C00, 0x2C00, 0x2000, 0x1E00, 0x1600, 0x1200, 0x1000, 0x0F00, 0x0B00, 0x0780, 0x0580, 0x0400, 0x0340, 0x0240, 0x0140, 0x0040 },
+ { 0xE000, 0x7800, 0x6000, 0x5000, 0x4000, 0x3800, 0x2800, 0x1C00, 0x0E00, 0x0A00, 0x0700, 0x0500, 0x0380, 0x0300, 0x0200, 0x0100, 0xFFFF },
+ { 0xD000, 0x7000, 0x5800, 0x4800, 0x3400, 0x2400, 0x1A00, 0x1400, 0x0D00, 0x0900, 0x0680, 0x0480, 0x02C0, 0x01C0, 0x00C0, 0xFFFF, 0xFFFF },
+ { 0xC000, 0xB000, 0xA000, 0x9000, 0x8000, 0x6800, 0x3000, 0x1800, 0x0C00, 0x0800, 0x0600, 0x0280, 0x0180, 0x0080, 0xFFFF, 0xFFFF, 0xFFFF }
+ }
+ };
+
+ static const uint16_t masktab[3][4][17] =
+ {
+ {
+ { 0x8000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF },
+ { 0xC000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000 },
+ { 0xE000, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000 },
+ { 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000 }
+ },
+ {
+ { 0xC000, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC },
+ { 0xC000, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFC, 0x0000 },
+ { 0xE000, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0x0000, 0x0000 },
+ { 0xF000, 0xF000, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFF80, 0xFFE0, 0xFFE0, 0xFFF0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0x0000, 0x0000, 0x0000 }
+ },
+ {
+ { 0xF000, 0xFC00, 0xFC00, 0xFC00, 0xFE00, 0xFE00, 0xFE00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0xFFC0 },
+ { 0xF000, 0xF800, 0xF800, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0x0000 },
+ { 0xF000, 0xF800, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0x0000, 0x0000 },
+ { 0xF000, 0xF000, 0xF000, 0xF000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFFC0, 0xFFC0, 0xFFC0, 0x0000, 0x0000, 0x0000 }
+ }
+ };
+
+ int code;
+ // vlcnum is the index of Table used to code coeff_token
+ // vlcnum==3 means (8<=nC) which uses 6bit FLC
+
+ if (vlcnum == 3)
+ {
+ // read 6 bit FLC
+ //code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBytes, 6);
+ code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBits, 6);
+ currStream->frame_bitoffset += 6;
+ sym->value2 = (code & 3);
+ sym->value1 = (code >> 2);
+
+ if (!sym->value1 && sym->value2 == 3)
+ {
+ // #c = 0, #t1 = 3 => #c = 0
+ sym->value2 = 0;
+ }
+ else
+ sym->value1++;
+ }
+ else
+ {
+ //retval = code_from_bitstream_2d(sym, currStream, &lentab[vlcnum][0][0], &codtab[vlcnum][0][0], 17, 4, &code);
+ code = opt_code_from_bitstream_2d_17_4(sym, currStream, lentab[vlcnum][0], codtab[vlcnum][0], masktab[vlcnum][0]);
+ }
+
+ return 0;
+}
+
+
+/*!
+************************************************************************
+* \brief
+* read NumCoeff/TrailingOnes codeword from UVLC-partition ChromaDC
+************************************************************************
+*/
+int readSyntaxElement_NumCoeffTrailingOnesChromaDC(VideoParameters *p_Vid, SyntaxElement *sym, Bitstream *currStream)
+{
+#if 0
+ static const uint8_t lentab[3][4][17] =
+ {
+ //YUV420
+ {{ 2, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 3, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+ //YUV422
+ {{ 1, 7, 7, 9, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 7, 7, 9,10,11,12,12, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 3, 7, 7, 9,10,11,12, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 5, 6, 7, 7,10,11, 0, 0, 0, 0, 0, 0, 0, 0}},
+ //YUV444
+ {{ 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+ { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16},
+ { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16},
+ { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16}}
+ };
+#endif
+
+ //YUV420
+
+ __declspec(align(32)) static const uint8_t lentab420[16] =
+ { 2, 6, 6, 6, 6, 1, 6, 7, 8, 3, 7, 8, 0, 0, 6, 7 };
+ __declspec(align(32)) static const uint8_t codtab420[16] =
+ { 0x40, 0x1C, 0x10, 0x0C, 0x08, 0x80, 0x18, 0x06, 0x03, 0x20, 0x04, 0x02, 0xFF, 0xFF, 0x14, 0x00 };
+ __declspec(align(32)) static const uint8_t masktab420[16] =
+ { 0xC0, 0xFC, 0xFC, 0xFC, 0xFC, 0x80, 0xFC, 0xFE, 0xFF, 0xE0, 0xFE, 0xFF, 0x00, 0x00, 0xFC, 0xFE };
+
+
+ // YUV422
+ __declspec(align(32)) static const uint16_t lentab422[4][9] =
+ {
+ { 1, 7, 7, 9, 9,10,11,12,13 },
+ { 2, 7, 7, 9,10,11,12,12, 0 },
+ { 3, 7, 7, 9,10,11,12, 0, 0 },
+ { 5, 6, 7, 7,10,11, 0, 0, 0 }
+ };
+ __declspec(align(32)) static const uint16_t codtab422[4][9] =
+ {
+ { 0x8000, 0x1E00, 0x1C00, 0x0380, 0x0300, 0x01C0, 0x00E0, 0x0070, 0x0038 },
+ { 0x4000, 0x1A00, 0x1800, 0x0280, 0x0180, 0x00C0, 0x0060, 0x0050, 0xFFFF },
+ { 0x2000, 0x1600, 0x1400, 0x0200, 0x0140, 0x00A0, 0x0040, 0xFFFF, 0xFFFF },
+ { 0x0800, 0x0400, 0x1200, 0x1000, 0x0100, 0x0080, 0xFFFF, 0xFFFF, 0xFFFF }
+ };
+ __declspec(align(32)) static const uint16_t masktab422[4][9] =
+ {
+ { 0x8000, 0xFE00, 0xFE00, 0xFF80, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0xFFF8 },
+ { 0xC000, 0xFE00, 0xFE00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0xFFF0, 0x0000 },
+ { 0xE000, 0xFE00, 0xFE00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0x0000, 0x0000 },
+ { 0xF800, 0xFC00, 0xFE00, 0xFE00, 0xFFC0, 0xFFE0, 0x0000, 0x0000, 0x0000 }
+ };
+
+ // YUV444
+ __declspec(align(32)) static const uint16_t lentab444[4][17] =
+ {
+ { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+ { 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16, 0},
+ { 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16, 0, 0},
+ { 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16, 0, 0, 0}
+ };
+ __declspec(align(32)) static const uint16_t codtab444[4][17] =
+ {
+ { 0x8000, 0x1400, 0x0700, 0x0380, 0x01C0, 0x00E0, 0x0078, 0x0058, 0x0040, 0x003C, 0x002C, 0x001E, 0x0016, 0x000F, 0x000B, 0x0007, 0x0004 },
+ { 0x4000, 0x1000, 0x0600, 0x0300, 0x0180, 0x00C0, 0x0070, 0x0050, 0x0038, 0x0028, 0x001C, 0x0014, 0x0002, 0x000E, 0x000A, 0x0006, 0xFFFF },
+ { 0x2000, 0x0A00, 0x0500, 0x0280, 0x0140, 0x00A0, 0x0068, 0x0048, 0x0034, 0x0024, 0x001A, 0x0012, 0x000D, 0x0009, 0x0005, 0xFFFF, 0xFFFF },
+ { 0x1800, 0x0C00, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0060, 0x0030, 0x0020, 0x0018, 0x0010, 0x000C, 0x0008, 0xFFFF, 0xFFFF, 0xFFFF }
+ };
+ __declspec(align(32)) static const uint16_t masktab444[4][17] =
+ {
+ { 0x8000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF },
+ { 0xC000, 0xFC00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000 },
+ { 0xE000, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000 },
+ { 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF8, 0xFFFC, 0xFFFC, 0xFFFE, 0xFFFE, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000 }
+ };
+
+#if 0
+ static const uint8_t codtab[3][4][17] =
+ {
+ //YUV420
+ {{ 1, 7, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, 6, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+ //YUV422
+ {{ 1,15,14, 7, 6, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1,13,12, 5, 6, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 1,11,10, 4, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 1, 1, 9, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}},
+ //YUV444
+ {{ 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7, 4},
+ { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10, 6},
+ { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9, 5},
+ { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12, 8}}
+ };
+#endif
+
+ int code;
+ int yuv = p_Vid->active_sps->chroma_format_idc - 1;
+ switch(yuv)
+ {
+ case 0:
+ code = opt_code_from_bitstream_2d_5_4(sym, currStream, lentab420, codtab420, masktab420);
+ break;
+ case 1:
+ code = code_from_bitstream_2d_9_4(sym, currStream, lentab422[0], codtab422[0], masktab422[0]);
+ break;
+ case 2:
+ code = opt_code_from_bitstream_2d_17_4(sym, currStream, lentab444[0], codtab444[0], masktab444[0]);
+ break;
+ default:
+ __assume(0);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* read Level VLC0 codeword from UVLC-partition
+************************************************************************
+*/
+int readSyntaxElement_Level_VLC0(Bitstream *currStream)
+{
+ int frame_bitoffset = currStream->frame_bitoffset;
+ int BitstreamLengthInBytes = currStream->bitstream_length;
+ int BitstreamLengthInBits = (BitstreamLengthInBytes << 3) + 7;
+ byte *buf = currStream->streamBuffer;
+ int len = 1, sign = 0, level = 0, code = 1;
+
+ while (!ShowBits(buf, frame_bitoffset++, BitstreamLengthInBits, 1))
+ len++;
+
+ if (len < 15)
+ {
+ sign = (len - 1) & 1;
+ level = ((len - 1) >> 1) + 1;
+ }
+ else if (len == 15)
+ {
+ // escape code
+ code <<= 4;
+ code |= ShowBits(buf, frame_bitoffset, BitstreamLengthInBits, 4);
+ len += 4;
+ frame_bitoffset += 4;
+ sign = (code & 0x01);
+ level = ((code >> 1) & 0x07) + 8;
+ }
+ else if (len >= 16)
+ {
+ // escape code
+ int addbit = (len - 16);
+ int offset = (2048 << addbit) - 2032;
+ len -= 4;
+ code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBits, len);
+ sign = (code & 0x01);
+ frame_bitoffset += len;
+ level = (code >> 1) + offset;
+
+ code |= (1 << (len)); // for display purpose only
+ len += addbit + 16;
+ }
+ currStream->frame_bitoffset = frame_bitoffset;
+ return (sign) ? -level : level ;
+ //sym->len = len;
+
+#if TRACE
+ tracebits2(sym->tracestring, sym->len, code);
+#endif
+
+
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* read Level VLC codeword from UVLC-partition
+************************************************************************
+*/
+int readSyntaxElement_Level_VLCN(int vlc, Bitstream *currStream)
+{
+ int frame_bitoffset = currStream->frame_bitoffset;
+ int BitstreamLengthInBytes = currStream->bitstream_length;
+ int BitstreamLengthInBits = (BitstreamLengthInBytes << 3) + 7;
+ byte *buf = currStream->streamBuffer;
+
+ int levabs, sign;
+ int len = 1;
+ int code = 1, sb;
+
+ int shift = vlc - 1;
+
+ // read pre zeros
+ while (!ShowBits(buf, frame_bitoffset ++, BitstreamLengthInBits, 1))
+ len++;
+
+ frame_bitoffset -= len;
+
+ if (len < 16)
+ {
+ levabs = ((len - 1) << shift) + 1;
+
+ // read (vlc-1) bits -> suffix
+ if (shift)
+ {
+ sb = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, shift);
+ code = (code << (shift) )| sb;
+ levabs += sb;
+ len += (shift);
+ }
+
+ // read 1 bit -> sign
+ sign = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, 1);
+ code = (code << 1)| sign;
+ len ++;
+ }
+ else // escape
+ {
+ int addbit = len - 5;
+ int offset = (1 << addbit) + (15 << shift) - 2047;
+
+ sb = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, addbit);
+ code = (code << addbit ) | sb;
+ len += addbit;
+
+ levabs = sb + offset;
+
+ // read 1 bit -> sign
+ sign = ShowBits(buf, frame_bitoffset + len, BitstreamLengthInBits, 1);
+
+ code = (code << 1)| sign;
+
+ len++;
+ }
+
+ currStream->frame_bitoffset = frame_bitoffset + len;
+ return (sign)? -levabs : levabs;
+}
+
+/*!
+************************************************************************
+* \brief
+* read Total Zeros codeword from UVLC-partition
+************************************************************************
+*/
+int readSyntaxElement_TotalZeros(Bitstream *currStream, int vlcnum)
+{
+ __declspec(align(32)) static const uint16_t lentab[TOTRUN_NUM][16] =
+ {
+
+ { 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+ { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+ { 4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+ { 5,3,4,4,3,3,3,4,3,4,5,5,5},
+ { 4,4,4,3,3,3,3,3,4,5,4,5},
+ { 6,5,3,3,3,3,3,3,4,3,6},
+ { 6,5,3,3,3,2,3,4,3,6},
+ { 6,4,5,3,2,2,3,3,6},
+ { 6,6,4,2,2,3,2,5},
+ { 5,5,3,2,2,2,4},
+ { 4,4,3,3,1,3},
+ { 4,4,2,1,3},
+ { 3,3,1,2},
+ { 2,2,1},
+ { 1,1},
+ };
+/*
+ static const byte codtab[TOTRUN_NUM][16] =
+ {
+ {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+ {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+ {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+ {3,7,5,4,6,5,4,3,3,2,2,1,0},
+ {5,4,3,7,6,5,4,3,2,1,1,0},
+ {1,1,7,6,5,4,3,2,1,1,0},
+ {1,1,5,4,3,3,2,1,1,0},
+ {1,1,1,3,3,2,2,1,0},
+ {1,0,1,3,2,1,1,1,},
+ {1,0,1,3,2,1,1,},
+ {0,1,1,2,1,3},
+ {0,1,1,1,1},
+ {0,1,1,1},
+ {0,1,1},
+ {0,1},
+ };*/
+
+ __declspec(align(32)) static const uint16_t codtab[TOTRUN_NUM][16] =
+ {
+{ 0x8000, 0x6000, 0x4000, 0x3000, 0x2000, 0x1800, 0x1000, 0x0C00, 0x0800, 0x0600, 0x0400, 0x0300, 0x0200, 0x0180, 0x0100, 0x0080, },
+{ 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x5000, 0x4000, 0x3000, 0x2000, 0x1800, 0x1000, 0x0C00, 0x0800, 0x0400, 0x0000, 0xFFFF, },
+{ 0x5000, 0xE000, 0xC000, 0xA000, 0x4000, 0x3000, 0x8000, 0x6000, 0x2000, 0x1800, 0x1000, 0x0400, 0x0800, 0x0000, 0xFFFF, 0xFFFF, },
+{ 0x1800, 0xE000, 0x5000, 0x4000, 0xC000, 0xA000, 0x8000, 0x3000, 0x6000, 0x2000, 0x1000, 0x0800, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x5000, 0x4000, 0x3000, 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x2000, 0x0800, 0x1000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0400, 0x0800, 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x4000, 0x1000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0400, 0x0800, 0xA000, 0x8000, 0x6000, 0xC000, 0x4000, 0x1000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0400, 0x1000, 0x0800, 0x6000, 0xC000, 0x8000, 0x4000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0400, 0x0000, 0x1000, 0xC000, 0x8000, 0x2000, 0x4000, 0x0800, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0800, 0x0000, 0x2000, 0xC000, 0x8000, 0x4000, 0x1000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0000, 0x1000, 0x2000, 0x4000, 0x8000, 0x6000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0000, 0x1000, 0x4000, 0x8000, 0x2000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0000, 0x2000, 0x8000, 0x4000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0000, 0x4000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x0000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, }
+ };
+ __declspec(align(32)) static const uint16_t masktab[TOTRUN_NUM][16] =
+ {
+{ 0x8000, 0xE000, 0xE000, 0xF000, 0xF000, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFE00, 0xFE00, 0xFF00, 0xFF00, 0xFF80, 0xFF80, 0xFF80, },
+{ 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF000, 0xF000, 0xF000, 0xF800, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0x0000, },
+{ 0xF000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF000, 0xE000, 0xE000, 0xF000, 0xF800, 0xF800, 0xFC00, 0xF800, 0xFC00, 0x0000, 0x0000, },
+{ 0xF800, 0xE000, 0xF000, 0xF000, 0xE000, 0xE000, 0xE000, 0xF000, 0xE000, 0xF000, 0xF800, 0xF800, 0xF800, 0x0000, 0x0000, 0x0000, },
+{ 0xF000, 0xF000, 0xF000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF800, 0xF000, 0xF800, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xFC00, 0xF800, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xE000, 0xFC00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xFC00, 0xF800, 0xE000, 0xE000, 0xE000, 0xC000, 0xE000, 0xF000, 0xE000, 0xFC00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xFC00, 0xF000, 0xF800, 0xE000, 0xC000, 0xC000, 0xE000, 0xE000, 0xFC00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xFC00, 0xFC00, 0xF000, 0xC000, 0xC000, 0xE000, 0xC000, 0xF800, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xF800, 0xF800, 0xE000, 0xC000, 0xC000, 0xC000, 0xF000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xF000, 0xF000, 0xE000, 0xE000, 0x8000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xF000, 0xF000, 0xC000, 0x8000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xE000, 0xE000, 0x8000, 0xC000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xC000, 0xC000, 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0x8000, 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+};
+
+
+ return opt_code_from_bitstream_2d_16_1(currStream, lentab[vlcnum], codtab[vlcnum], masktab[vlcnum]);
+}
+
+/*!
+************************************************************************
+* \brief
+* read Total Zeros Chroma DC codeword from UVLC-partition
+************************************************************************
+*/
+int readSyntaxElement_TotalZerosChromaDC(VideoParameters *p_Vid, Bitstream *currStream, int vlcnum)
+{
+ static const byte lentab[3][TOTRUN_NUM][16] =
+ {
+ //YUV420
+ {{ 1,2,3,3},
+ { 1,2,2},
+ { 1,1}},
+ //YUV422
+ {{ 1,3,3,4,4,4,5,5},
+ { 3,2,3,3,3,3,3},
+ { 3,3,2,2,3,3},
+ { 3,2,2,2,3},
+ { 2,2,2,2},
+ { 2,2,1},
+ { 1,1}},
+ //YUV444
+ {{ 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+ { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+ { 4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+ { 5,3,4,4,3,3,3,4,3,4,5,5,5},
+ { 4,4,4,3,3,3,3,3,4,5,4,5},
+ { 6,5,3,3,3,3,3,3,4,3,6},
+ { 6,5,3,3,3,2,3,4,3,6},
+ { 6,4,5,3,2,2,3,3,6},
+ { 6,6,4,2,2,3,2,5},
+ { 5,5,3,2,2,2,4},
+ { 4,4,3,3,1,3},
+ { 4,4,2,1,3},
+ { 3,3,1,2},
+ { 2,2,1},
+ { 1,1}}
+ };
+
+ static const byte codtab[3][TOTRUN_NUM][16] =
+ {
+ //YUV420
+ {{ 1,1,1,0},
+ { 1,1,0},
+ { 1,0}},
+ //YUV422
+ {{ 1,2,3,2,3,1,1,0},
+ { 0,1,1,4,5,6,7},
+ { 0,1,1,2,6,7},
+ { 6,0,1,2,7},
+ { 0,1,2,3},
+ { 0,1,1},
+ { 0,1}},
+ //YUV444
+ {{1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+ {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+ {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+ {3,7,5,4,6,5,4,3,3,2,2,1,0},
+ {5,4,3,7,6,5,4,3,2,1,1,0},
+ {1,1,7,6,5,4,3,2,1,1,0},
+ {1,1,5,4,3,3,2,1,1,0},
+ {1,1,1,3,3,2,2,1,0},
+ {1,0,1,3,2,1,1,1,},
+ {1,0,1,3,2,1,1,},
+ {0,1,1,2,1,3},
+ {0,1,1,1,1},
+ {0,1,1,1},
+ {0,1,1},
+ {0,1}}
+ };
+
+ int yuv = p_Vid->active_sps->chroma_format_idc - 1;
+ return code_from_bitstream_2d_16_1(currStream, &lentab[yuv][vlcnum][0], &codtab[yuv][vlcnum][0]);
+}
+
+
+/*!
+************************************************************************
+* \brief
+* read Run codeword from UVLC-partition
+************************************************************************
+*/
+int readSyntaxElement_Run(Bitstream *currStream, int vlcnum)
+{
+ __declspec(align(32)) static const uint16_t lentab[TOTRUN_NUM][16] =
+ {
+ {1,1},
+ {1,2,2},
+ {2,2,2,2},
+ {2,2,2,3,3},
+ {2,2,3,3,3,3},
+ {2,3,3,3,3,3,3},
+ {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
+ };
+/*
+ static const byte codtab[TOTRUN_NUM][16] =
+ {
+ {1,0},
+ {1,1,0},
+ {3,2,1,0},
+ {3,2,1,1,0},
+ {3,2,3,2,1,0},
+ {3,0,1,3,2,5,4},
+ {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
+ };*/
+
+ __declspec(align(32)) static const uint16_t codtab[TOTRUN_NUM][16] =
+ {
+{ 0x8000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0x8000, 0x4000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0xC000, 0x8000, 0x4000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0xC000, 0x8000, 0x4000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0xC000, 0x8000, 0x6000, 0x4000, 0x2000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0xC000, 0x0000, 0x2000, 0x6000, 0x4000, 0xA000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, },
+{ 0xE000, 0xC000, 0xA000, 0x8000, 0x6000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0xFFFF, }
+ };
+ __declspec(align(32)) static const uint16_t masktab[TOTRUN_NUM][16] =
+{
+{ 0x8000, 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0x8000, 0xC000, 0xC000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xC000, 0xC000, 0xC000, 0xC000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xC000, 0xC000, 0xC000, 0xE000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xC000, 0xC000, 0xE000, 0xE000, 0xE000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xC000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, },
+{ 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0x0000, }
+ };
+ return opt_code_from_bitstream_2d_16_1(currStream, lentab[vlcnum], codtab[vlcnum], masktab[vlcnum]);
+}
+
+
+/*!
+************************************************************************
+* \brief
+* Reads bits from the bitstream buffer
+*
+* \param buffer
+* containing VLC-coded data bits
+* \param totbitoffset
+* bit offset from start of partition
+* \param info
+* returns value of the read bits
+* \param bitcount
+* total bytes in bitstream
+* \param numbits
+* number of bits to read
+*
+************************************************************************
+*/
+
+int GetBits (const uint8_t buffer[],int totbitoffset,int *info, int bitcount,
+ int numbits)
+{
+ int bitoffset = /*7 - */(totbitoffset & 0x07); // bit from start of byte
+ int byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ const uint8_t *ptr = &(buffer[byteoffset]);
+
+ uint32_t tmp = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | (ptr[3]);
+ tmp <<= bitoffset;
+ tmp >>= 32 - numbits;
+ *info = tmp;
+ return numbits;
+}
+
+/*!
+************************************************************************
+* \brief
+* Reads bits from the bitstream buffer
+*
+* \param buffer
+* buffer containing VLC-coded data bits
+* \param totbitoffset
+* bit offset from start of partition
+* \param bitcount
+* total bytes in bitstream
+* \param numbits
+* number of bits to read
+*
+************************************************************************
+*/
+
+static int ShowBits (const uint8_t buffer[],int totbitoffset,int bitcount, int numbits)
+{
+ int bitoffset = /*7 - */(totbitoffset & 0x07); // bit from start of byte
+ int byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ const uint8_t *ptr = &(buffer[byteoffset]);
+
+ uint32_t tmp = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | (ptr[3]);
+ tmp <<= bitoffset;
+ tmp >>= 32 - numbits;
+ return tmp;
+
+#if 0
+ if ((totbitoffset + numbits ) > bitcount)
+ {
+ return -1;
+ }
+ else
+ {
+ int bitoffset = 7 - (totbitoffset & 0x07); // bit from start of byte
+ int byteoffset = (totbitoffset >> 3); // byte from start of buffer
+ const uint8_t *curbyte = &(buffer[byteoffset]);
+ int inf = 0;
+
+ while (numbits--)
+ {
+ inf <<=1;
+ inf |= ((*curbyte)>> (bitoffset--)) & 0x01;
+
+ if (bitoffset == -1 )
+ { //Move onto next byte to get all of numbits
+ curbyte++;
+ bitoffset = 7;
+ }
+ }
+ return inf; // return absolute offset in bit from start of frame
+ }
+#endif
+}
+
diff --git a/Src/h264dec/ldecod_vc9.vcxproj b/Src/h264dec/ldecod_vc9.vcxproj
new file mode 100644
index 00000000..ad81b0c3
--- /dev/null
+++ b/Src/h264dec/ldecod_vc9.vcxproj
@@ -0,0 +1,487 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release IPP|Win32">
+ <Configuration>Release IPP</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release IPP|x64">
+ <Configuration>Release IPP</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectName>h264dec</ProjectName>
+ <ProjectGuid>{5499B067-CF32-4141-A757-E0A29866994A}</ProjectGuid>
+ <RootNamespace>ldecod</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">
+ <OutDir>$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <TypeLibraryName>.\ldecod/Debug_vc8/ldecod.tlb</TypeLibraryName>
+ <HeaderFileName />
+ </Midl>
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <BrowseInformation>true</BrowseInformation>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Midl>
+ <TypeLibraryName>.\ldecod/Debug_vc8/ldecod.tlb</TypeLibraryName>
+ <HeaderFileName>
+ </HeaderFileName>
+ </Midl>
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <BrowseInformation>true</BrowseInformation>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName>
+ <HeaderFileName />
+ </Midl>
+ <ClCompile>
+ <Optimization>Full</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <ExceptionHandling />
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Midl>
+ <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName>
+ <HeaderFileName>
+ </HeaderFileName>
+ </Midl>
+ <ClCompile>
+ <Optimization>Full</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">
+ <Midl>
+ <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName>
+ <HeaderFileName />
+ </Midl>
+ <ClCompile>
+ <Optimization>Full</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <ExceptionHandling />
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4101;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">
+ <Midl>
+ <TypeLibraryName>.\ldecod\Release_vc8/ldecod.tlb</TypeLibraryName>
+ <HeaderFileName>
+ </HeaderFileName>
+ </Midl>
+ <ClCompile>
+ <Optimization>Full</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <AdditionalIncludeDirectories>..\external_dependencies\intel_ipp_6.1.1.035\ia32\include;ldecod/inc;lcommon/inc;../Wasabi;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;H264_IPP;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4101;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="ldecod\src\biari.asm">
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath)
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath)
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)biarix86.obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)biarix86.obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">$(IntDir)biarix86.obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">$(IntDir)biarix86.obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath)
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /c /nologo /Fo"$(IntDir)biarix86.obj" /Zi %(FullPath)
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)biarix86.obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)biarix86.obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ <CustomBuild Include="ldecod\src\macroblock.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">ml /c /nologo /Fo"$(IntDir)macroblockx86.obj" /Zi "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">ml /c /nologo /Fo"$(IntDir)macroblockx86.obj" /Zi "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">$(IntDir)macroblockx86.obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">$(IntDir)macroblockx86.obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ <CustomBuild Include="ldecod\src\prediction.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">ml /c /nologo /Fo"$(IntDir)predictionx86.obj" /Zi "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">ml /c /nologo /Fo"$(IntDir)predictionx86.obj" /Zi "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|Win32'">$(IntDir)predictionx86.obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release IPP|x64'">$(IntDir)predictionx86.obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="dec_api.c" />
+ <ClCompile Include="lcommon\src\memalloc.c" />
+ <ClCompile Include="lcommon\src\memcache.c" />
+ <ClCompile Include="lcommon\src\mv_prediction.c" />
+ <ClCompile Include="lcommon\src\parsetcommon.c" />
+ <ClCompile Include="lcommon\src\transform.c" />
+ <ClCompile Include="lcommon\src\win32.c" />
+ <ClCompile Include="ldecod\src\biaridecod.c" />
+ <ClCompile Include="ldecod\src\block.c" />
+ <ClCompile Include="ldecod\src\cabac.c" />
+ <ClCompile Include="ldecod\src\context_ini.c" />
+ <ClCompile Include="ldecod\src\erc_api.c" />
+ <ClCompile Include="ldecod\src\erc_do_i.c" />
+ <ClCompile Include="ldecod\src\erc_do_p.c" />
+ <ClCompile Include="ldecod\src\errorconcealment.c" />
+ <ClCompile Include="ldecod\src\filter_chroma_horiz.c" />
+ <ClCompile Include="ldecod\src\filter_chroma_vert.c" />
+ <ClCompile Include="ldecod\src\filter_luma_horiz.c" />
+ <ClCompile Include="ldecod\src\filter_luma_vert.c" />
+ <ClCompile Include="ldecod\src\fmo.c" />
+ <ClCompile Include="ldecod\src\header.c" />
+ <ClCompile Include="ldecod\src\image.c" />
+ <ClCompile Include="ldecod\src\intra16x16_pred.c" />
+ <ClCompile Include="ldecod\src\intra4x4_pred.c" />
+ <ClCompile Include="ldecod\src\intra8x8_pred.c" />
+ <ClCompile Include="ldecod\src\intra_chroma_pred.c" />
+ <ClCompile Include="ldecod\src\ldecod.c" />
+ <ClCompile Include="ldecod\src\loopFilter.c" />
+ <ClCompile Include="ldecod\src\macroblock.c" />
+ <ClCompile Include="ldecod\src\mbuffer.c" />
+ <ClCompile Include="ldecod\src\mb_access.c" />
+ <ClCompile Include="ldecod\src\mb_prediction.c" />
+ <ClCompile Include="ldecod\src\mc_prediction.c" />
+ <ClCompile Include="ldecod\src\meminput.c" />
+ <ClCompile Include="ldecod\src\nal.c" />
+ <ClCompile Include="ldecod\src\nalu.c" />
+ <ClCompile Include="ldecod\src\nalucommon.c" />
+ <ClCompile Include="ldecod\src\output.c" />
+ <ClCompile Include="ldecod\src\parset.c" />
+ <ClCompile Include="ldecod\src\quant.c" />
+ <ClCompile Include="ldecod\src\sei.c" />
+ <ClCompile Include="ldecod\src\storable_picture.c" />
+ <ClCompile Include="ldecod\src\strength_horiz.c" />
+ <ClCompile Include="ldecod\src\strength_vert.c" />
+ <ClCompile Include="ldecod\src\transform8x8.c" />
+ <ClCompile Include="ldecod\src\vlc.c" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="dec_api.h" />
+ <ClInclude Include="lcommon\inc\ctx_tables.h" />
+ <ClInclude Include="lcommon\inc\frame.h" />
+ <ClInclude Include="lcommon\inc\ifunctions.h" />
+ <ClInclude Include="lcommon\inc\mb_access.h" />
+ <ClInclude Include="lcommon\inc\memalloc.h" />
+ <ClInclude Include="lcommon\inc\memcache.h" />
+ <ClInclude Include="lcommon\inc\mv_prediction.h" />
+ <ClInclude Include="lcommon\inc\nalucommon.h" />
+ <ClInclude Include="lcommon\inc\transform.h" />
+ <ClInclude Include="lcommon\inc\types.h" />
+ <ClInclude Include="lcommon\inc\win32.h" />
+ <ClInclude Include="ldecod\inc\biaridecod.h" />
+ <ClInclude Include="ldecod\inc\block.h" />
+ <ClInclude Include="ldecod\inc\cabac.h" />
+ <ClInclude Include="ldecod\inc\context_ini.h" />
+ <ClInclude Include="ldecod\inc\contributors.h" />
+ <ClInclude Include="ldecod\inc\defines.h" />
+ <ClInclude Include="ldecod\inc\elements.h" />
+ <ClInclude Include="ldecod\inc\erc_api.h" />
+ <ClInclude Include="ldecod\inc\erc_do.h" />
+ <ClInclude Include="ldecod\inc\erc_globals.h" />
+ <ClInclude Include="ldecod\inc\errorconcealment.h" />
+ <ClInclude Include="ldecod\inc\fmo.h" />
+ <ClInclude Include="ldecod\inc\global.h" />
+ <ClInclude Include="ldecod\inc\header.h" />
+ <ClInclude Include="ldecod\inc\image.h" />
+ <ClInclude Include="ldecod\inc\intra16x16_pred.h" />
+ <ClInclude Include="ldecod\inc\intra4x4_pred.h" />
+ <ClInclude Include="ldecod\inc\intra8x8_pred.h" />
+ <ClInclude Include="ldecod\inc\leaky_bucket.h" />
+ <ClInclude Include="ldecod\inc\loopfilter.h" />
+ <ClInclude Include="ldecod\inc\macroblock.h" />
+ <ClInclude Include="ldecod\inc\mbuffer.h" />
+ <ClInclude Include="ldecod\inc\mc_prediction.h" />
+ <ClInclude Include="ldecod\inc\meminput.h" />
+ <ClInclude Include="ldecod\inc\nalu.h" />
+ <ClInclude Include="ldecod\inc\optim.h" />
+ <ClInclude Include="ldecod\inc\output.h" />
+ <ClInclude Include="ldecod\inc\parset.h" />
+ <ClInclude Include="ldecod\inc\parsetcommon.h" />
+ <ClInclude Include="ldecod\inc\quant.h" />
+ <ClInclude Include="ldecod\inc\sei.h" />
+ <ClInclude Include="ldecod\inc\transform8x8.h" />
+ <ClInclude Include="ldecod\inc\vlc.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/h264dec/ldecod_vc9.vcxproj.filters b/Src/h264dec/ldecod_vc9.vcxproj.filters
new file mode 100644
index 00000000..00bcc7d3
--- /dev/null
+++ b/Src/h264dec/ldecod_vc9.vcxproj.filters
@@ -0,0 +1,304 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{303af6ae-839c-47f8-9a67-adb97270c1cc}</UniqueIdentifier>
+ <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+ </Filter>
+ <Filter Include="Source Files\deblocking filter">
+ <UniqueIdentifier>{6a99fb4f-1595-4387-a229-fb983e10ee1d}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{3b640415-33d1-4075-9c43-c885e4ab8760}</UniqueIdentifier>
+ <Extensions>h;hpp;hxx;hm;inl</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="ldecod\src\biaridecod.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\block.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\cabac.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\context_ini.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="dec_api.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\erc_api.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\erc_do_i.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\erc_do_p.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\errorconcealment.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\fmo.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\header.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\image.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\intra16x16_pred.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\intra4x4_pred.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\intra8x8_pred.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\intra_chroma_pred.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\ldecod.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\macroblock.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\mb_access.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\mb_prediction.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\mbuffer.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\mc_prediction.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="lcommon\src\memalloc.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="lcommon\src\memcache.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\meminput.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="lcommon\src\mv_prediction.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\nal.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\nalu.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\nalucommon.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\output.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\parset.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="lcommon\src\parsetcommon.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\quant.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\sei.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\storable_picture.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="lcommon\src\transform.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\transform8x8.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\vlc.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="lcommon\src\win32.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\filter_chroma_horiz.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\filter_chroma_vert.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\filter_luma_horiz.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\filter_luma_vert.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\loopFilter.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\strength_horiz.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ <ClCompile Include="ldecod\src\strength_vert.c">
+ <Filter>Source Files\deblocking filter</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="ldecod\inc\biaridecod.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\block.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\cabac.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\context_ini.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\contributors.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\ctx_tables.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="dec_api.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\defines.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\elements.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\erc_api.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\erc_do.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\erc_globals.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\errorconcealment.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\fmo.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\frame.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\global.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\header.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\ifunctions.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\image.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\intra16x16_pred.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\intra4x4_pred.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\intra8x8_pred.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\leaky_bucket.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\loopfilter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\macroblock.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\mb_access.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\mbuffer.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\mc_prediction.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\memalloc.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\memcache.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\meminput.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\mv_prediction.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\nalu.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\nalucommon.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\optim.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\output.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\parset.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\parsetcommon.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\quant.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\sei.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\transform.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\transform8x8.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\types.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ldecod\inc\vlc.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="lcommon\inc\win32.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="ldecod\src\biari.asm">
+ <Filter>Source Files</Filter>
+ </CustomBuild>
+ <CustomBuild Include="ldecod\src\macroblock.asm">
+ <Filter>Source Files</Filter>
+ </CustomBuild>
+ <CustomBuild Include="ldecod\src\prediction.asm">
+ <Filter>Source Files</Filter>
+ </CustomBuild>
+ </ItemGroup>
+</Project> \ No newline at end of file