aboutsummaryrefslogtreecommitdiff
path: root/Src/h264dec/lcommon/src
diff options
context:
space:
mode:
Diffstat (limited to 'Src/h264dec/lcommon/src')
-rw-r--r--Src/h264dec/lcommon/src/img_io.c327
-rw-r--r--Src/h264dec/lcommon/src/memalloc.c1280
-rw-r--r--Src/h264dec/lcommon/src/memcache.c106
-rw-r--r--Src/h264dec/lcommon/src/mv_prediction.c250
-rw-r--r--Src/h264dec/lcommon/src/parsetcommon.c244
-rw-r--r--Src/h264dec/lcommon/src/transform.c809
-rw-r--r--Src/h264dec/lcommon/src/win32.c67
7 files changed, 3083 insertions, 0 deletions
diff --git a/Src/h264dec/lcommon/src/img_io.c b/Src/h264dec/lcommon/src/img_io.c
new file mode 100644
index 00000000..c0520218
--- /dev/null
+++ b/Src/h264dec/lcommon/src/img_io.c
@@ -0,0 +1,327 @@
+
+/*!
+ *************************************************************************************
+ * \file img_io.c
+ *
+ * \brief
+ * image I/O related functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ *************************************************************************************
+ */
+#include "contributors.h"
+#include "global.h"
+#include "img_io.h"
+#include "report.h"
+
+static const VIDEO_SIZE VideoRes[] = {
+ { "qcif" , 176, 144},
+ { "qqvga" , 160, 128},
+ { "qvga" , 320, 240},
+ { "sif" , 352, 240},
+ { "cif" , 352, 288},
+ { "vga" , 640, 480},
+ { "sd1" , 720, 480},
+ { "sd2" , 704, 576},
+ { "sd3" , 720, 576},
+ { "720p" , 1280, 720},
+ { "1080p" , 1920, 1080},
+ { NULL, 0, 0}
+};
+
+/*!
+ ************************************************************************
+ * \brief
+ * Parse Size from from file name
+ *
+ ************************************************************************
+ */
+int ParseSizeFromString (VideoDataFile *input_file, int *x_size, int *y_size, double *fps)
+{
+ char *p1, *p2, *tail;
+ char *fn = input_file->fname;
+ char c;
+ int i = 0;
+
+ *x_size = *y_size = -1;
+ p1 = p2 = fn;
+ while (p1 != NULL && p2 != NULL)
+ {
+ // Search for first '_'
+ p1 = strstr( p1, "_");
+ if (p1 == NULL)
+ break;
+
+ // Search for end character of x_size (first 'x' after last '_')
+ p2 = strstr( p1, "x");
+
+ // If no 'x' is found, exit
+ if (p2 == NULL)
+ break;
+
+ // Try conversion of number
+ *p2 = 0;
+ *x_size = strtol( p1 + 1, &tail, 10);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p1 + 1) == '\0')
+ {
+ *p2 = 'x';
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p2 = 'x';
+
+ // Search for end character of y_size (first '_' or '.' after last 'x')
+ p1 = strpbrk( p2 + 1, "_.");
+ // If no '_' or '.' is found, try again from current position
+ if (p1 == NULL)
+ {
+ p1 = p2 + 1;
+ continue;
+ }
+
+ // Try conversion of number
+ c = *p1;
+ *p1 = 0;
+ *y_size = strtol( p2 + 1, &tail, 10);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p2 + 1) == '\0')
+ {
+ *p1 = c;
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p1 = c;
+
+ // Search for end character of y_size (first 'i' or 'p' after last '_')
+ p2 = strstr( p1 + 1, "ip");
+
+ // If no 'i' or 'p' is found, exit
+ if (p2 == NULL)
+ break;
+
+ // Try conversion of number
+ c = *p2;
+ *p2 = 0;
+ *fps = strtod( p1 + 1, &tail);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p1 + 1) == '\0')
+ {
+ *p2 = c;
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p2 = c;
+ break;
+ }
+
+ // Now lets test some common video file formats
+ if (p1 == NULL || p2 == NULL)
+ {
+ for (i = 0; VideoRes[i].name != NULL; i++)
+ {
+ if (strcasecmp (fn, VideoRes[i].name))
+ {
+ *x_size = VideoRes[i].x_size;
+ *y_size = VideoRes[i].y_size;
+ // Should add frame rate support as well
+ break;
+ }
+ }
+ }
+
+ return (*x_size == -1 || *y_size == -1) ? 0 : 1;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Parse Size from from file name
+ *
+ ************************************************************************
+ */
+void ParseFrameNoFormatFromString (VideoDataFile *input_file)
+{
+ char *p1, *p2, *tail;
+ char *fn = input_file->fname;
+ char *fhead = input_file->fhead;
+ char *ftail = input_file->ftail;
+ int *zero_pad = &input_file->zero_pad;
+ int *num_digits = &input_file->num_digits;
+
+ *zero_pad = 0;
+ *num_digits = -1;
+ p1 = p2 = fn;
+ while (p1 != NULL && p2 != NULL)
+ {
+ // Search for first '_'
+ p1 = strstr( p1, "%");
+ if (p1 == NULL)
+ break;
+
+ strncpy(fhead, fn, p1 - fn);
+
+ // Search for end character of x_size (first 'x' after last '_')
+ p2 = strstr( p1, "d");
+
+ // If no 'x' is found, exit
+ if (p2 == NULL)
+ break;
+
+ // Try conversion of number
+ *p2 = 0;
+
+ if (*(p1 + 1) == '0')
+ *zero_pad = 1;
+
+ *num_digits = strtol( p1 + 1, &tail, 10);
+
+ // If there are characters left in the string, or the string is null, discard conversion
+ if (*tail != '\0' || *(p1 + 1) == '\0')
+ {
+ *p2 = 'd';
+ p1 = tail;
+ continue;
+ }
+
+ // Conversion was correct. Restore string
+ *p2 = 'd';
+
+ tail++;
+ strncpy(ftail, tail, strlen(tail));
+ break;
+ }
+
+ if (input_file->vdtype == VIDEO_TIFF)
+ {
+ input_file->is_concatenated = 0;
+ }
+ else
+ input_file->is_concatenated = (*num_digits == -1) ? 1 : 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Open file containing a single frame
+ ************************************************************************
+ */
+void OpenFrameFile( VideoDataFile *input_file, int FrameNumberInFile)
+{
+ char infile [FILE_NAME_SIZE], in_number[16];
+ int length = 0;
+ in_number[length]='\0';
+ length = strlen(input_file->fhead);
+ strncpy(infile, input_file->fhead, length);
+ infile[length]='\0';
+ if (input_file->zero_pad)
+ snprintf(in_number, 16, "%0*d", input_file->num_digits, FrameNumberInFile);
+ else
+ snprintf(in_number, 16, "%*d", input_file->num_digits, FrameNumberInFile);
+
+ strncat(infile, in_number, sizeof(in_number));
+ length += sizeof(in_number);
+ infile[length]='\0';
+ strncat(infile, input_file->ftail, strlen(input_file->ftail));
+ length += strlen(input_file->ftail);
+ infile[length]='\0';
+
+ if ((input_file->f_num = open(infile, OPENFLAGS_READ)) == -1)
+ {
+ printf ("OpenFrameFile: cannot open file %s\n", infile);
+ report_stats_on_error();
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Open file(s) containing the entire frame sequence
+ ************************************************************************
+ */
+void OpenFiles( VideoDataFile *input_file)
+{
+ if (input_file->is_concatenated == 1)
+ {
+ if (strlen(input_file->fname) == 0)
+ {
+ snprintf(errortext, ET_SIZE, "No input sequence name was provided. Please check settings.");
+ error (errortext, 500);
+ }
+
+ if ((input_file->f_num = open(input_file->fname, OPENFLAGS_READ)) == -1)
+ {
+ snprintf(errortext, ET_SIZE, "Input file %s does not exist",input_file->fname);
+ error (errortext, 500);
+ }
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Close input file
+ ************************************************************************
+ */
+void CloseFiles(VideoDataFile *input_file)
+{
+ if (input_file->f_num != -1)
+ close(input_file->f_num);
+ input_file->f_num = -1;
+}
+
+/* ==========================================================================
+ *
+ * ParseVideoType
+ *
+ * ==========================================================================
+*/
+VideoFileType ParseVideoType (VideoDataFile *input_file)
+{
+ char *format;
+
+ format = input_file->fname + strlen(input_file->fname) - 3;
+
+ if (strcasecmp (format, "yuv") == 0)
+ {
+ input_file->vdtype = VIDEO_YUV;
+ input_file->format.yuv_format = YUV420;
+ input_file->avi = NULL;
+ }
+ else if (strcasecmp (format, "rgb") == 0)
+ {
+ input_file->vdtype = VIDEO_RGB;
+ input_file->format.yuv_format = YUV444;
+ input_file->avi = NULL;
+ }
+ else if (strcasecmp (format, "tif") == 0)
+ {
+ input_file->vdtype = VIDEO_TIFF;
+ input_file->avi = NULL;
+ }
+ else if (strcasecmp (format, "avi") == 0)
+ {
+ input_file->vdtype = VIDEO_AVI;
+ }
+ else
+ {
+ //snprintf(errortext, ET_SIZE, "ERROR: video file format not supported");
+ //error (errortext, 500);
+ input_file->vdtype = VIDEO_YUV;
+ input_file->format.yuv_format = YUV420;
+ input_file->avi = NULL;
+ }
+
+ return input_file->vdtype;
+}
diff --git a/Src/h264dec/lcommon/src/memalloc.c b/Src/h264dec/lcommon/src/memalloc.c
new file mode 100644
index 00000000..da5872ed
--- /dev/null
+++ b/Src/h264dec/lcommon/src/memalloc.c
@@ -0,0 +1,1280 @@
+
+/*!
+ ************************************************************************
+ * \file memalloc.c
+ *
+ * \brief
+ * Memory allocation and free helper functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ * - Karsten Sühring <suehring@hhi.de>
+ *
+ ************************************************************************
+ */
+
+#include "global.h"
+#include "memalloc.h"
+#include "mbuffer.h"
+
+#define ROUNDUP16(size) (((size)+15) & ~15)
+
+#if !defined(USEMMX)
+ /*!
+ ************************************************************************
+ * \brief
+ * Initialize 2-dimensional top and bottom field to point to the proper
+ * lines in frame
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int init_top_bot_planes(imgpel **imgFrame, int dim0, imgpel ***imgTopField, imgpel ***imgBotField)
+{
+ int i;
+
+ if((*imgTopField = (imgpel**) malloc((dim0>>1) * sizeof(imgpel*))) == NULL)
+ no_mem_exit("init_top_bot_planes: imgTopField");
+
+ if((*imgBotField = (imgpel**) malloc((dim0>>1) * sizeof(imgpel*))) == NULL)
+ no_mem_exit("init_top_bot_planes: imgBotField");
+
+ for(i = 0; i < (dim0>>1); i++)
+ {
+ (*imgTopField)[i] = imgFrame[2 * i ];
+ (*imgBotField)[i] = imgFrame[2 * i + 1];
+ }
+
+ return dim0 * sizeof(imgpel*);
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ * free 2-dimensional top and bottom fields without freeing target memory
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField)
+{
+ free (imgTopField);
+ free (imgBotField);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 1D memory array -> imgpel array1D[dim0
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int get_mem1Dpel(imgpel **array1D, int dim0)
+{
+ if((*array1D = (imgpel*)calloc(dim0, sizeof(imgpel))) == NULL)
+ no_mem_exit("get_mem1Dpel: arra12D");
+
+ return (sizeof(imgpel*) + dim0 * sizeof(imgpel));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> imgpel array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int get_mem2Dpel(imgpel ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if((*array2D = (imgpel**)malloc(dim0 * sizeof(imgpel*))) == NULL)
+ no_mem_exit("get_mem2Dpel: array2D");
+ if((*(*array2D) = (imgpel* )calloc(dim0 * dim1,sizeof(imgpel ))) == NULL)
+ no_mem_exit("get_mem2Dpel: array2D");
+
+ for(i = 1 ; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(imgpel*) + dim1 * sizeof(imgpel));
+}
+
+VideoImage *get_memImage(int width, int height)
+{
+ int i, stride;
+ VideoImage *image = (VideoImage *)calloc(1, sizeof(VideoImage));
+
+#ifdef H264_IPP
+
+ IppiSize roi = {width, height};
+ if (!image)
+ return 0;
+ if((image->img = (imgpel**)malloc(height * sizeof(imgpel*))) == NULL)
+ return 0;
+
+ image->base_address = (imgpel* )ippiMalloc_8u_C1(width, height+1, &stride); // height+1 so we can deal with overreading
+ if (!image->base_address)
+ return 0;
+
+ image->stride=stride;
+
+ for(i = 0 ; i < height; i++)
+ image->img[i] = image->base_address + stride*i;
+
+ image->next = 0;
+
+ return image;
+#else
+ if (!image)
+ return 0;
+ stride = ROUNDUP16(width);
+ image->stride = stride;
+
+ if((image->img = (imgpel**)malloc(height * sizeof(imgpel*))) == NULL)
+ return 0;
+ if((image->base_address = (imgpel* )malloc(stride * height* sizeof(imgpel))) == NULL)
+ return 0;
+ memset(image->base_address, 0, stride * height* sizeof(imgpel));
+
+ for(i = 0 ; i < height; i++)
+ image->img[i] = image->base_address + stride*i;
+
+ return image;
+#endif
+}
+
+void free_memImage(VideoImage *image)
+{
+ free(image->img);
+ #ifdef H264_IPP
+ ippiFree(image->base_address);
+ #else
+ free(image->base_address);
+ #endif
+ free(image);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> imgpel array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dpel(imgpel ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(imgpel**);
+
+ if(((*array3D) = (imgpel***)malloc(dim0 * sizeof(imgpel**))) == NULL)
+ no_mem_exit("get_mem3Dpel: array3D");
+
+ mem_size += get_mem2Dpel(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i - 1] + dim1;
+
+ return mem_size;
+}
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory array -> imgpel array4D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dpel(imgpel *****array4D, int dim0, int dim1, int dim2, int dim3)
+{
+ int i, mem_size = dim0 * sizeof(imgpel***);
+
+ if(((*array4D) = (imgpel****)malloc(dim0 * sizeof(imgpel***))) == NULL)
+ no_mem_exit("get_mem4Dpel: array4D");
+
+ mem_size += get_mem3Dpel(*array4D, dim0 * dim1, dim2, dim3);
+
+ for(i = 1; i < dim0; i++)
+ (*array4D)[i] = (*array4D)[i - 1] + dim1;
+
+ return mem_size;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 1D memory array
+ * which was allocated with get_mem1Dpel()
+ ************************************************************************
+ */
+void free_mem1Dpel(imgpel *array1D)
+{
+ if (array1D)
+ {
+ free (array1D);
+ }
+ else
+ {
+ error ("free_mem1Dpel: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2Dpel()
+ ************************************************************************
+ */
+void free_mem2Dpel(imgpel **array2D)
+{
+ if (array2D)
+ {
+ if (*array2D)
+ free (*array2D);
+ else
+ error ("free_mem2Dpel: trying to free unused memory",100);
+
+ free (array2D);
+ }
+ else
+ {
+ error ("free_mem2Dpel: trying to free unused memory",100);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3Dpel()
+ ************************************************************************
+ */
+void free_mem3Dpel(imgpel ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Dpel(*array3D);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3Dpel: trying to free unused memory",100);
+ }
+}
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D memory array
+ * which was allocated with get_mem4Dpel()
+ ************************************************************************
+ */
+void free_mem4Dpel(imgpel ****array4D)
+{
+ if (array4D)
+ {
+ free_mem3Dpel(*array4D);
+ free (array4D);
+ }
+ else
+ {
+ error ("free_mem4Dpel: trying to free unused memory",100);
+ }
+}
+/*!
+ ************************************************************************
+ * \brief
+ * free 5D memory array
+ * which was allocated with get_mem5Dpel()
+ ************************************************************************
+ */
+void free_mem5Dpel(imgpel *****array5D)
+{
+ if (array5D)
+ {
+ free_mem4Dpel(*array5D);
+ free (array5D);
+ }
+ else
+ {
+ error ("free_mem5Dpel: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> unsigned char array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************/
+int get_mem2D(byte ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if(( *array2D = (byte**)malloc(dim0 * sizeof(byte*))) == NULL)
+ no_mem_exit("get_mem2D: array2D");
+ if((*(*array2D) = (byte* )calloc(dim0 * dim1,sizeof(byte ))) == NULL)
+ no_mem_exit("get_mem2D: array2D");
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(byte*) + dim1 * sizeof(byte));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dint(int ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if((*array2D = (int**)malloc(dim0 * sizeof(int*))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+ if((*(*array2D) = (int* )calloc(dim0 * dim1, sizeof(int ))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+
+ for(i = 1 ; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(int*) + dim1 * sizeof(int));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int64 array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+static int get_mem2Dref(h264_ref_t ***array2D, int dim0, int dim1)
+{
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(h264_ref_t));
+ if((*array2D = (h264_ref_t**)malloc(dim0 * sizeof(h264_ref_t*))) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ if((*(*array2D) = (h264_ref_t* )_aligned_malloc(malloc_size, 32)) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ memset((*array2D)[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(h264_ref_t*) + dim1 * sizeof(h264_ref_t));
+}
+
+int get_mem2DPicMotion(PicMotion ***array2D, int dim0, int dim1)
+{
+ // we allocate with one extra position in the first dimension
+ // so the motion_cache can use it as a next pointer
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(PicMotion));
+ if((*array2D = (PicMotion**)malloc((dim0+1) * sizeof(PicMotion*))) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ if((*(*array2D) = (PicMotion* )_aligned_malloc(malloc_size, 32)) == NULL)
+ no_mem_exit("get_mem2Dint64: array2D");
+ memset((*array2D)[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+ (*array2D)[dim0] = 0;
+
+ return dim0 * (sizeof(PicMotion*) + dim1 * sizeof(PicMotion));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> unsigned char array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3D(byte ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(byte**);
+
+ if(((*array3D) = (byte***)malloc(dim0 * sizeof(byte**))) == NULL)
+ no_mem_exit("get_mem3D: array3D");
+
+ mem_size += get_mem2D(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory array -> unsigned char array4D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem4D(byte *****array4D, int dim0, int dim1, int dim2, int dim3)
+{
+ int i, mem_size = dim0 * sizeof(byte***);
+
+ if(((*array4D) = (byte****)malloc(dim0 * sizeof(byte***))) == NULL)
+ no_mem_exit("get_mem4D: array4D");
+
+ mem_size += get_mem3D(*array4D, dim0 * dim1, dim2, dim3);
+
+ for(i = 1; i < dim0; i++)
+ (*array4D)[i] = (*array4D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> int array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dint(int ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(int**);
+
+ if(((*array3D) = (int***)malloc(dim0 * sizeof(int**))) == NULL)
+ no_mem_exit("get_mem3Dint: array3D");
+
+ mem_size += get_mem2Dint(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> int64 array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dref(h264_ref_t ****array3D, int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(h264_ref_t**);
+
+ if(((*array3D) = (h264_ref_t***)malloc(dim0 * sizeof(h264_ref_t**))) == NULL)
+ no_mem_exit("get_mem3Dint64: array3D");
+
+ mem_size += get_mem2Dref(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory array -> int array4D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dint(int *****array4D, int dim0, int dim1, int dim2, int dim3)
+{
+ int i, mem_size = dim0 * sizeof(int***);
+
+ if(((*array4D) = (int****)malloc(dim0 * sizeof(int***))) == NULL)
+ no_mem_exit("get_mem4Dint: array4D");
+
+ mem_size += get_mem3Dint(*array4D, dim0 * dim1, dim2, dim3);
+
+ for(i = 1; i < dim0; i++)
+ (*array4D)[i] = (*array4D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2D()
+ ************************************************************************
+ */
+void free_mem2D(byte **array2D)
+{
+ if (array2D)
+ {
+ free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2Dint()
+ ************************************************************************
+ */
+void free_mem2Dint(int **array2D)
+{
+ if (array2D)
+ {
+ free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D memory array
+ * which was allocated with get_mem2Dint64()
+ ************************************************************************
+ */
+void free_mem2Dref(h264_ref_t **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free (*array2D);
+ free (array2D);
+ }
+}
+
+void free_mem2DPicMotion(PicMotion **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3D()
+ ************************************************************************
+ */
+void free_mem3D(byte ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2D(*array3D);
+ free (array3D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D memory array
+ * which was allocated with get_mem3D()
+ ************************************************************************
+ */
+void free_mem4D(byte ****array4D)
+{
+ if (array4D)
+ {
+ free_mem3D(*array4D);
+ free (array4D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3Dint()
+ ************************************************************************
+ */
+void free_mem3Dint(int ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Dint(*array3D);
+ free (array3D);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was allocated with get_mem3Dint64()
+ ************************************************************************
+ */
+void free_mem3Dref(h264_ref_t ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Dref(*array3D);
+ free (array3D);
+ }
+}
+
+void free_mem3DPicMotion(PicMotion ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2DPicMotion(*array3D);
+ free (array3D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D memory array
+ * which was allocated with get_mem4Dint()
+ ************************************************************************
+ */
+void free_mem4Dint(int ****array4D)
+{
+ if (array4D)
+ {
+ free_mem3Dint( *array4D);
+ free (array4D);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Exit program if memory allocation failed (using error())
+ * \param where
+ * string indicating which memory allocation failed
+ ************************************************************************
+ */
+void no_mem_exit(char *where)
+{
+ snprintf(errortext, ET_SIZE, "Could not allocate memory: %s",where);
+ error (errortext, 100);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D uint16 memory array -> uint16 array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Duint16(uint16 ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if(( *array2D = (uint16**)malloc(dim0 * sizeof(uint16*))) == NULL)
+ no_mem_exit("get_mem2Duint16: array2D");
+
+ if((*(*array2D) = (uint16* )calloc(dim0 * dim1,sizeof(uint16 ))) == NULL)
+ no_mem_exit("get_mem2Duint16: array2D");
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(uint16*) + dim1 * sizeof(uint16));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D short memory array -> short array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dshort(short ***array2D, int dim0, int dim1)
+{
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1*sizeof(short));
+ if(( *array2D = (short**)malloc(dim0 * sizeof(short*))) == NULL)
+ no_mem_exit("get_mem2Dshort: array2D");
+ if((*(*array2D) = (short* )_aligned_malloc(malloc_size, 32)) == NULL)
+ no_mem_exit("get_mem2Dshort: array2D");
+ memset((*array2D)[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(short*) + dim1 * sizeof(short));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory short array -> short array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dshort(short ****array3D,int dim0, int dim1, int dim2)
+{
+ int i, mem_size = dim0 * sizeof(short**);
+
+ if(((*array3D) = (short***)malloc(dim0 * sizeof(short**))) == NULL)
+ no_mem_exit("get_mem3Dshort: array3D");
+
+ mem_size += get_mem2Dshort(*array3D, dim0 * dim1, dim2);
+
+ for(i = 1; i < dim0; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1;
+
+ return mem_size;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 4D memory short array -> short array3D[dim0][dim1][dim2][dim3]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+static MotionVector **get_mem2DMotionVector(int dim0, int dim1)
+{
+ MotionVector **array2D;
+ int i;
+ size_t malloc_size = ROUNDUP16(dim0 * dim1)*sizeof(MotionVector);
+ if((array2D = (MotionVector**)malloc(dim0 * sizeof(MotionVector*))) == NULL)
+ return 0;
+
+ if((array2D[0] = (MotionVector* )_aligned_malloc(malloc_size, 32)) == NULL)
+ {
+ free(array2D);
+ return 0;
+ }
+ memset(array2D[0], 0, malloc_size);
+
+ for(i = 1; i < dim0; i++)
+ array2D[i] = array2D[i-1] + dim1;
+
+ return array2D;
+}
+
+MotionVector ***get_mem3DMotionVector(int dim0, int dim1, int dim2)
+{
+ MotionVector ***array3D;
+ int i;
+
+ if((array3D = (MotionVector***)malloc(dim0 * sizeof(MotionVector **))) == NULL)
+ return 0;
+
+ array3D[0] = get_mem2DMotionVector(dim0 * dim1, dim2);
+ if (!array3D[0])
+ {
+ free(array3D);
+ return 0;
+ }
+
+ for(i = 1; i < dim0; i++)
+ array3D[i] = array3D[i-1] + dim1;
+
+ return array3D;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D uint16 memory array
+ * which was allocated with get_mem2Duint16()
+ ************************************************************************
+ */
+void free_mem2Duint16(uint16 **array2D)
+{
+ if (array2D)
+ {
+ free (*array2D);
+ free (array2D);
+ }
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D short memory array
+ * which was allocated with get_mem2Dshort()
+ ************************************************************************
+ */
+void free_mem2Dshort(short **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free (*array2D);
+ free (array2D);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 4D short memory array
+ * which was allocated with get_mem4Dshort()
+ ************************************************************************
+ */
+
+static void free_mem2DMotionVector(MotionVector **array2D)
+{
+ if (array2D)
+ {
+ _aligned_free(*array2D);
+ free (array2D);
+ }
+
+}
+
+void free_mem3DMotionVector(MotionVector ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2DMotionVector( *array3D);
+ free (array3D);
+ }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> double array2D[dim0][dim1]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Ddouble(double ***array2D, int dim0, int dim1)
+{
+ int i;
+
+ if((*array2D = (double**)malloc(dim0 * sizeof(double*))) == NULL)
+ no_mem_exit("get_mem2Ddouble: array2D");
+
+ if(((*array2D)[0] = (double* )calloc(dim0 * dim1,sizeof(double ))) == NULL)
+ no_mem_exit("get_mem2Ddouble: array2D");
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1 ;
+
+ return dim0 * (sizeof(double*) + dim1 * sizeof(double));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> double array2D[dim0][dim1]
+ * Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dodouble(double ***array2D, int dim0, int dim1, int offset)
+{
+ int i;
+
+ if((*array2D = (double**)malloc(dim0 * sizeof(double*))) == NULL)
+ no_mem_exit("get_mem2Dodouble: array2D");
+ if(((*array2D)[0] = (double* )calloc(dim0 * dim1, sizeof(double ))) == NULL)
+ no_mem_exit("get_mem2Dodouble: array2D");
+
+ (*array2D)[0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1 ;
+
+ return dim0 * (sizeof(double*) + dim1 * sizeof(double));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory double array -> double array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dodouble(double ****array3D, int dim0, int dim1, int dim2, int offset)
+{
+ int i,j;
+
+ if(((*array3D) = (double***)malloc(dim0 * sizeof(double**))) == NULL)
+ no_mem_exit("get_mem3Dodouble: array3D");
+
+ if(((*array3D)[0] = (double** )calloc(dim0 * dim1, sizeof(double*))) == NULL)
+ no_mem_exit("get_mem3Dodouble: array3D");
+
+ (*array3D) [0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1 ;
+
+ for (i = 0; i < dim0; i++)
+ for (j = -offset; j < dim1 - offset; j++)
+ if(((*array3D)[i][j] = (double* )calloc(dim2, sizeof(double))) == NULL)
+ no_mem_exit("get_mem3Dodouble: array3D");
+
+ return dim0*( sizeof(double**) + dim1 * ( sizeof(double*) + dim2 * sizeof(double)));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int array2D[dim0][dim1]
+ * Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_offset_mem2Dshort(short ***array2D, int dim0, int dim1, int offset_y, int offset_x)
+{
+ int i;
+
+ if((*array2D = (short**)malloc(dim0 * sizeof(short*))) == NULL)
+ no_mem_exit("get_offset_mem2Dshort: array2D");
+
+ if(((*array2D)[0] = (short* )calloc(dim0 * dim1, sizeof(short))) == NULL)
+ no_mem_exit("get_offset_mem2Dshort: array2D");
+ (*array2D)[0] += offset_x + offset_y * dim1;
+
+ for(i=-1 ; i > -offset_y - 1; i--)
+ {
+ (*array2D)[i] = (*array2D)[i+1] - dim1;
+ }
+
+ for(i=1 ; i < dim1 - offset_y; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1;
+
+ return dim0 * (sizeof(short*) + dim1 * sizeof(short));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory int array -> int array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Doint(int ****array3D, int dim0, int dim1, int dim2, int offset)
+{
+ int i,j;
+
+ if(((*array3D) = (int***)malloc(dim0 * sizeof(int**))) == NULL)
+ no_mem_exit("get_mem3Doint: array3D");
+
+ if(((*array3D)[0] = (int** )calloc(dim0 * dim1, sizeof(int*))) == NULL)
+ no_mem_exit("get_mem3Doint: array3D");
+
+ (*array3D) [0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array3D)[i] = (*array3D)[i-1] + dim1 ;
+
+ for (i = 0; i < dim0; i++)
+ for (j = -offset; j < dim1 - offset; j++)
+ if(((*array3D)[i][j] = (int* )calloc(dim2, sizeof(int))) == NULL)
+ no_mem_exit("get_mem3Doint: array3D");
+
+ return dim0 * (sizeof(int**) + dim1 * (sizeof(int*) + dim2 * sizeof(int)));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 2D memory array -> int array2D[dim0][dim1]
+ * Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Doint(int ***array2D, int dim0, int dim1, int offset)
+{
+ int i;
+
+ if((*array2D = (int**)malloc(dim0 * sizeof(int*))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+ if(((*array2D)[0] = (int* )calloc(dim0 * dim1, sizeof(int))) == NULL)
+ no_mem_exit("get_mem2Dint: array2D");
+
+ (*array2D)[0] += offset;
+
+ for(i=1 ; i<dim0 ; i++)
+ (*array2D)[i] = (*array2D)[i-1] + dim1 ;
+
+ return dim0 * (sizeof(int*) + dim1 * sizeof(int));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ * Allocate 3D memory array -> int array3D[dim0][dim1][dim2]
+ *
+ * \par Output:
+ * memory size in bytes
+ ************************************************************************
+ */
+// same change as in get_mem3Dint
+int get_mem3Ddouble(double ****array3D, int dim0, int dim1, int dim2)
+{
+ int j, mem_size = dim0 * sizeof(double**);
+
+ double **array2D;
+
+ if(((*array3D) = (double***)malloc(dim0 * sizeof(double**))) == NULL)
+ no_mem_exit("get_mem3Ddouble: array3D");
+
+ mem_size += get_mem2Ddouble(&array2D, dim0 * dim1, dim2);
+
+ for(j = 0; j < dim0; j++)
+ {
+ (*array3D)[j] = &array2D[j * dim1];
+ }
+
+ return mem_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 2D double memory array
+ * which was allocated with get_mem2Ddouble()
+ ************************************************************************
+ */
+void free_mem2Ddouble(double **array2D)
+{
+ if (array2D)
+ {
+ if (*array2D)
+ free (*array2D);
+ else
+ error ("free_mem2Ddouble: trying to free unused memory",100);
+
+ free (array2D);
+
+ }
+ else
+ {
+ error ("free_mem2Ddouble: trying to free unused memory",100);
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* free 2D double memory array (with offset)
+* which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_mem2Dodouble(double **array2D, int offset)
+{
+ if (array2D)
+ {
+ array2D[0] -= offset;
+ if (array2D[0])
+ free (array2D[0]);
+ else error ("free_mem2Dodouble: trying to free unused memory",100);
+
+ free (array2D);
+
+ } else
+ {
+ error ("free_mem2Dodouble: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array with offset
+ ************************************************************************
+ */
+void free_mem3Dodouble(double ***array3D, int dim0, int dim1, int offset)
+{
+ int i, j;
+
+ if (array3D)
+ {
+ for (i = 0; i < dim0; i++)
+ {
+ for (j = -offset; j < dim1 - offset; j++)
+ {
+ if (array3D[i][j])
+ free(array3D[i][j]);
+ else
+ error ("free_mem3Dodouble: trying to free unused memory",100);
+ }
+ }
+ array3D[0] -= offset;
+ if (array3D[0])
+ free(array3D[0]);
+ else
+ error ("free_mem3Dodouble: trying to free unused memory",100);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3Dodouble: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array with offset
+ ************************************************************************
+ */
+void free_mem3Doint(int ***array3D, int dim0, int dim1, int offset)
+{
+ int i, j;
+
+ if (array3D)
+ {
+ for (i = 0; i < dim0; i++)
+ {
+ for (j = -offset; j < dim1 - offset; j++)
+ {
+ if (array3D[i][j])
+ free(array3D[i][j]);
+ else
+ error ("free_mem3Doint: trying to free unused memory",100);
+ }
+ }
+ array3D[0] -= offset;
+ if (array3D[0])
+ free(array3D[0]);
+ else
+ error ("free_mem3Doint: trying to free unused memory",100);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3Doint: trying to free unused memory",100);
+ }
+}
+
+
+/*!
+************************************************************************
+* \brief
+* free 2D double memory array (with offset)
+* which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_mem2Doint(int **array2D, int offset)
+{
+ if (array2D)
+ {
+ array2D[0] -= offset;
+ if (array2D[0])
+ free (array2D[0]);
+ else
+ error ("free_mem2Doint: trying to free unused memory",100);
+
+ free (array2D);
+
+ }
+ else
+ {
+ error ("free_mem2Doint: trying to free unused memory",100);
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* free 2D double memory array (with offset)
+* which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_offset_mem2Dshort(short **array2D, int dim1, int offset_y, int offset_x)
+{
+ if (array2D)
+ {
+ array2D[0] -= offset_x + offset_y * dim1;
+ if (array2D[0])
+ free (array2D[0]);
+ else
+ error ("free_offset_mem2Dshort: trying to free unused memory",100);
+
+ free (array2D);
+
+ }
+ else
+ {
+ error ("free_offset_mem2Dshort: trying to free unused memory",100);
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * free 3D memory array
+ * which was alocated with get_mem3Dint()
+ ************************************************************************
+ */
+void free_mem3Ddouble(double ***array3D)
+{
+ if (array3D)
+ {
+ free_mem2Ddouble(*array3D);
+ free (array3D);
+ }
+ else
+ {
+ error ("free_mem3D: trying to free unused memory",100);
+ }
+}
+
+
+#endif
diff --git a/Src/h264dec/lcommon/src/memcache.c b/Src/h264dec/lcommon/src/memcache.c
new file mode 100644
index 00000000..ce3b29d1
--- /dev/null
+++ b/Src/h264dec/lcommon/src/memcache.c
@@ -0,0 +1,106 @@
+#include "memcache.h"
+#include "mbuffer.h"
+#include "memalloc.h"
+
+void image_cache_flush(ImageCache *cache)
+{
+ while (cache->head)
+ {
+ VideoImage *next = cache->head->next;
+ free_memImage(cache->head);
+ cache->head = next;
+ }
+ cache->size_x = 0;
+ cache->size_y = 0;
+}
+
+void image_cache_set_dimensions(ImageCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ {
+ image_cache_flush(cache);
+ cache->size_x = width;
+ cache->size_y = height;
+ }
+}
+
+int image_cache_dimensions_match(ImageCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ return 0;
+
+ return 1;
+}
+
+void image_cache_add(ImageCache *cache, VideoImage *image)
+{
+ image->next = cache->head;
+ cache->head = image;
+}
+
+struct video_image *image_cache_get(ImageCache *cache)
+{
+ if (cache->head)
+ {
+ VideoImage *ret = cache->head;
+ cache->head = ret->next;
+ ret->next = 0;
+ return ret;
+ }
+ return 0;
+}
+
+/* -------------
+
+PicMotion arrays are allowed with one extra slot in the first dimension
+which we use as the next pointer
+------------- */
+
+
+void motion_cache_flush(MotionCache *cache)
+{
+ while (cache->head)
+ {
+ PicMotion **next = (PicMotion **)cache->head[cache->size_y];
+ free_mem2DPicMotion(cache->head);
+ cache->head = next;
+ }
+ cache->size_x = 0;
+ cache->size_y = 0;
+}
+
+void motion_cache_set_dimensions(MotionCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ {
+ motion_cache_flush(cache);
+ cache->size_x = width;
+ cache->size_y = height;
+ }
+}
+
+int motion_cache_dimensions_match(MotionCache *cache, int width, int height)
+{
+ if (width != cache->size_x || height != cache->size_y)
+ return 0;
+
+ return 1;
+}
+
+void motion_cache_add(MotionCache *cache, PicMotion **image)
+{
+ image[cache->size_y] = (PicMotion *)cache->head;
+ cache->head = image;
+}
+
+struct pic_motion **motion_cache_get(MotionCache *cache)
+{
+ if (cache->head)
+ {
+ PicMotion **ret = cache->head;
+ cache->head = (PicMotion **)ret[cache->size_y];
+ ret[cache->size_y] = 0;
+ return ret;
+ }
+ return 0;
+}
diff --git a/Src/h264dec/lcommon/src/mv_prediction.c b/Src/h264dec/lcommon/src/mv_prediction.c
new file mode 100644
index 00000000..b4638d6d
--- /dev/null
+++ b/Src/h264dec/lcommon/src/mv_prediction.c
@@ -0,0 +1,250 @@
+/*!
+ *************************************************************************************
+ * \file mv_prediction.c
+ *
+ * \brief
+ * Motion Vector Prediction Functions
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Alexis Michael Tourapis <alexismt@ieee.org>
+ * - Karsten Sühring <suehring@hhi.de>
+ *************************************************************************************
+ */
+
+#include "global.h"
+#include "mbuffer.h"
+/*!
+ ************************************************************************
+ * \brief
+ * Get motion vector predictor
+ ************************************************************************
+ */
+static void GetMotionVectorPredictorMBAFF (Macroblock *currMB,
+ PixelPos *block, // <--> block neighbors
+ short pmv[2],
+ short ref_frame,
+ PicMotion **motion,
+ int mb_x,
+ int mb_y,
+ int blockshape_x,
+ int blockshape_y)
+{
+ int mv_a, mv_b, mv_c, pred_vec=0;
+ int mvPredType, rFrameL, rFrameU, rFrameUR;
+ int hv;
+ VideoParameters *p_Vid = currMB->p_Vid;
+
+ mvPredType = MVPRED_MEDIAN;
+
+
+ if (currMB->mb_field)
+ {
+ rFrameL = block[0].available
+ ? (p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].ref_idx
+ : motion[block[0].pos_y][block[0].pos_x].ref_idx * 2) : -1;
+ rFrameU = block[1].available
+ ? (p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].ref_idx
+ : motion[block[1].pos_y][block[1].pos_x].ref_idx * 2) : -1;
+ rFrameUR = block[2].available
+ ? (p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].ref_idx
+ : motion[block[2].pos_y][block[2].pos_x].ref_idx * 2) : -1;
+ }
+ else
+ {
+ rFrameL = block[0].available
+ ? (p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].ref_idx >>1
+ : motion[block[0].pos_y][block[0].pos_x].ref_idx) : -1;
+ rFrameU = block[1].available
+ ? (p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].ref_idx >>1
+ : motion[block[1].pos_y][block[1].pos_x].ref_idx) : -1;
+ rFrameUR = block[2].available
+ ? (p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].ref_idx >>1
+ : motion[block[2].pos_y][block[2].pos_x].ref_idx) : -1;
+ }
+
+
+ /* Prediction if only one of the neighbors uses the reference frame
+ * we are checking
+ */
+ if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)
+ mvPredType = MVPRED_L;
+ else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)
+ mvPredType = MVPRED_U;
+ else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)
+ mvPredType = MVPRED_UR;
+ // Directional predictions
+ if(blockshape_x == 8 && blockshape_y == 16)
+ {
+ if(mb_x == 0)
+ {
+ if(rFrameL == ref_frame)
+ mvPredType = MVPRED_L;
+ }
+ else
+ {
+ if( rFrameUR == ref_frame)
+ mvPredType = MVPRED_UR;
+ }
+ }
+ else if(blockshape_x == 16 && blockshape_y == 8)
+ {
+ if(mb_y == 0)
+ {
+ if(rFrameU == ref_frame)
+ mvPredType = MVPRED_U;
+ }
+ else
+ {
+ if(rFrameL == ref_frame)
+ mvPredType = MVPRED_L;
+ }
+ }
+
+ for (hv=0; hv < 2; hv++)
+ {
+ if (hv == 0)
+ {
+ mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[hv] : 0;
+ mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[hv] : 0;
+ mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[hv] : 0;
+ }
+ else
+ {
+ if (currMB->mb_field)
+ {
+ mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].mv[hv]
+ : motion[block[0].pos_y][block[0].pos_x].mv[hv] / 2
+ : 0;
+ mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].mv[hv]
+ : motion[block[1].pos_y][block[1].pos_x].mv[hv] / 2
+ : 0;
+ mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].mv[hv]
+ : motion[block[2].pos_y][block[2].pos_x].mv[hv] / 2
+ : 0;
+ }
+ else
+ {
+ mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
+ ? motion[block[0].pos_y][block[0].pos_x].mv[hv] * 2
+ : motion[block[0].pos_y][block[0].pos_x].mv[hv]
+ : 0;
+ mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
+ ? motion[block[1].pos_y][block[1].pos_x].mv[hv] * 2
+ : motion[block[1].pos_y][block[1].pos_x].mv[hv]
+ : 0;
+ mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
+ ? motion[block[2].pos_y][block[2].pos_x].mv[hv] * 2
+ : motion[block[2].pos_y][block[2].pos_x].mv[hv]
+ : 0;
+ }
+ }
+
+ switch (mvPredType)
+ {
+ case MVPRED_MEDIAN:
+ if(!(block[1].available || block[2].available))
+ {
+ pred_vec = mv_a;
+ }
+ else
+ {
+ pred_vec = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
+ }
+ break;
+ case MVPRED_L:
+ pred_vec = mv_a;
+ break;
+ case MVPRED_U:
+ pred_vec = mv_b;
+ break;
+ case MVPRED_UR:
+ pred_vec = mv_c;
+ break;
+ default:
+ break;
+ }
+
+ pmv[hv] = (short) pred_vec;
+ }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ * Get motion vector predictor
+ ************************************************************************
+ */
+// TODO: benski> make SSE3/MMX version
+static void GetMotionVectorPredictorNormal (Macroblock *currMB,
+ PixelPos *block, // <--> block neighbors
+ short pmv[2],
+ short ref_frame,
+ PicMotion **motion,
+ int mb_x,
+ int mb_y,
+ int blockshape_x,
+ int blockshape_y)
+{
+ int rFrameL = block[0].available ? motion[block[0].pos_y][block[0].pos_x].ref_idx : -1;
+ int rFrameU = block[1].available ? motion[block[1].pos_y][block[1].pos_x].ref_idx : -1;
+ int rFrameUR = block[2].available ? motion[block[2].pos_y][block[2].pos_x].ref_idx : -1;
+
+ /* Prediction if only one of the neighbors uses the reference frame
+ * we are checking
+ */
+ if (rFrameL == ref_frame &&
+ ((rFrameU != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x == 0) || (blockshape_x == 16 && blockshape_y == 8 && mb_y != 0)))
+ { // left
+ pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
+ pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
+ }
+ else if (rFrameU == ref_frame &&
+ ((rFrameL != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 16 && blockshape_y == 8 && mb_y == 0)))
+ { // up
+ pmv[0] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
+ pmv[1] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
+ }
+ else if (rFrameUR == ref_frame &&
+ ((rFrameL != ref_frame && rFrameU != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x != 0)))
+ { // upper right
+ pmv[0] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
+ pmv[1] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
+ }
+ else
+ { // median
+ if(!(block[1].available || block[2].available))
+ {
+ pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
+ pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
+ }
+ else
+ {
+ int mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
+ int mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
+ int mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
+ pmv[0] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
+ mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
+ mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
+ mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
+ pmv[1] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
+ }
+ }
+}
+
+void init_motion_vector_prediction(Macroblock *currMB, int mb_aff_frame_flag)
+{
+ if (mb_aff_frame_flag)
+ currMB->GetMVPredictor = GetMotionVectorPredictorMBAFF;
+ else
+ currMB->GetMVPredictor = GetMotionVectorPredictorNormal;
+}
diff --git a/Src/h264dec/lcommon/src/parsetcommon.c b/Src/h264dec/lcommon/src/parsetcommon.c
new file mode 100644
index 00000000..fe3f0e9a
--- /dev/null
+++ b/Src/h264dec/lcommon/src/parsetcommon.c
@@ -0,0 +1,244 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ * parsetcommon.c
+ * \brief
+ * Picture and Sequence Parameter set generation and handling
+ * \date 25 November 2002
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Stephan Wenger <stewe@cs.tu-berlin.de>
+ *
+ **************************************************************************************
+ */
+
+#include "global.h"
+#include "parsetcommon.h"
+#include "memalloc.h"
+/*!
+ *************************************************************************************
+ * \brief
+ * Allocates memory for a picture paramater set
+ *
+ * \return
+ * pointer to a pps
+ *************************************************************************************
+ */
+
+pic_parameter_set_rbsp_t *AllocPPS ()
+ {
+ pic_parameter_set_rbsp_t *p;
+
+ if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL)
+ no_mem_exit ("AllocPPS: PPS");
+ p->slice_group_id = NULL;
+ return p;
+ }
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Allocates memory for am sequence paramater set
+ *
+ * \return
+ * pointer to a sps
+ *************************************************************************************
+ */
+
+seq_parameter_set_rbsp_t *AllocSPS ()
+ {
+ seq_parameter_set_rbsp_t *p;
+
+ if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL)
+ no_mem_exit ("AllocSPS: SPS");
+ return p;
+ }
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ * Frees a picture parameter set
+ *
+ * \param pps to be freed
+ * Picture parameter set to be freed
+ *************************************************************************************
+ */
+
+ void FreePPS (pic_parameter_set_rbsp_t *pps)
+ {
+ assert (pps != NULL);
+ if (pps->slice_group_id != NULL)
+ free (pps->slice_group_id);
+ free (pps);
+ }
+
+
+ /*!
+ *************************************************************************************
+ * \brief
+ * Frees a sps
+ *
+ * \param sps
+ * Sequence parameter set to be freed
+ *************************************************************************************
+ */
+
+ void FreeSPS (seq_parameter_set_rbsp_t *sps)
+ {
+ assert (sps != NULL);
+ free (sps);
+ }
+
+
+int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2)
+{
+ unsigned i;
+ int equal = 1;
+
+ if ((!sps1->Valid) || (!sps2->Valid))
+ return 0;
+
+ equal &= (sps1->profile_idc == sps2->profile_idc);
+ equal &= (sps1->constrained_set0_flag == sps2->constrained_set0_flag);
+ equal &= (sps1->constrained_set1_flag == sps2->constrained_set1_flag);
+ equal &= (sps1->constrained_set2_flag == sps2->constrained_set2_flag);
+ equal &= (sps1->level_idc == sps2->level_idc);
+ equal &= (sps1->seq_parameter_set_id == sps2->seq_parameter_set_id);
+ equal &= (sps1->log2_max_frame_num_minus4 == sps2->log2_max_frame_num_minus4);
+ equal &= (sps1->pic_order_cnt_type == sps2->pic_order_cnt_type);
+
+ if (!equal) return equal;
+
+ if( sps1->pic_order_cnt_type == 0 )
+ {
+ equal &= (sps1->log2_max_pic_order_cnt_lsb_minus4 == sps2->log2_max_pic_order_cnt_lsb_minus4);
+ }
+
+ else if( sps1->pic_order_cnt_type == 1 )
+ {
+ equal &= (sps1->delta_pic_order_always_zero_flag == sps2->delta_pic_order_always_zero_flag);
+ equal &= (sps1->offset_for_non_ref_pic == sps2->offset_for_non_ref_pic);
+ equal &= (sps1->offset_for_top_to_bottom_field == sps2->offset_for_top_to_bottom_field);
+ equal &= (sps1->num_ref_frames_in_pic_order_cnt_cycle == sps2->num_ref_frames_in_pic_order_cnt_cycle);
+ if (!equal) return equal;
+
+ for ( i = 0 ; i< sps1->num_ref_frames_in_pic_order_cnt_cycle ;i ++)
+ equal &= (sps1->offset_for_ref_frame[i] == sps2->offset_for_ref_frame[i]);
+ }
+
+ equal &= (sps1->num_ref_frames == sps2->num_ref_frames);
+ equal &= (sps1->gaps_in_frame_num_value_allowed_flag == sps2->gaps_in_frame_num_value_allowed_flag);
+ equal &= (sps1->pic_width_in_mbs_minus1 == sps2->pic_width_in_mbs_minus1);
+ equal &= (sps1->pic_height_in_map_units_minus1 == sps2->pic_height_in_map_units_minus1);
+ equal &= (sps1->frame_mbs_only_flag == sps2->frame_mbs_only_flag);
+
+ if (!equal) return equal;
+ if( !sps1->frame_mbs_only_flag )
+ equal &= (sps1->mb_adaptive_frame_field_flag == sps2->mb_adaptive_frame_field_flag);
+
+ equal &= (sps1->direct_8x8_inference_flag == sps2->direct_8x8_inference_flag);
+ equal &= (sps1->frame_cropping_flag == sps2->frame_cropping_flag);
+ if (!equal) return equal;
+ if (sps1->frame_cropping_flag)
+ {
+ equal &= (sps1->frame_cropping_rect_left_offset == sps2->frame_cropping_rect_left_offset);
+ equal &= (sps1->frame_cropping_rect_right_offset == sps2->frame_cropping_rect_right_offset);
+ equal &= (sps1->frame_cropping_rect_top_offset == sps2->frame_cropping_rect_top_offset);
+ equal &= (sps1->frame_cropping_rect_bottom_offset == sps2->frame_cropping_rect_bottom_offset);
+ }
+ equal &= (sps1->vui_parameters_present_flag == sps2->vui_parameters_present_flag);
+
+ return equal;
+}
+
+int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2)
+{
+ unsigned i, j;
+ int equal = 1;
+
+ if ((!pps1->Valid) || (!pps2->Valid))
+ return 0;
+
+ equal &= (pps1->pic_parameter_set_id == pps2->pic_parameter_set_id);
+ equal &= (pps1->seq_parameter_set_id == pps2->seq_parameter_set_id);
+ equal &= (pps1->entropy_coding_mode_flag == pps2->entropy_coding_mode_flag);
+ equal &= (pps1->bottom_field_pic_order_in_frame_present_flag == pps2->bottom_field_pic_order_in_frame_present_flag);
+ equal &= (pps1->num_slice_groups_minus1 == pps2->num_slice_groups_minus1);
+
+ if (!equal) return equal;
+
+ if (pps1->num_slice_groups_minus1>0)
+ {
+ equal &= (pps1->slice_group_map_type == pps2->slice_group_map_type);
+ if (!equal) return equal;
+ if (pps1->slice_group_map_type == 0)
+ {
+ for (i=0; i<=pps1->num_slice_groups_minus1; i++)
+ equal &= (pps1->run_length_minus1[i] == pps2->run_length_minus1[i]);
+ }
+ else if( pps1->slice_group_map_type == 2 )
+ {
+ for (i=0; i<pps1->num_slice_groups_minus1; i++)
+ {
+ equal &= (pps1->top_left[i] == pps2->top_left[i]);
+ equal &= (pps1->bottom_right[i] == pps2->bottom_right[i]);
+ }
+ }
+ else if( pps1->slice_group_map_type == 3 || pps1->slice_group_map_type==4 || pps1->slice_group_map_type==5 )
+ {
+ equal &= (pps1->slice_group_change_direction_flag == pps2->slice_group_change_direction_flag);
+ equal &= (pps1->slice_group_change_rate_minus1 == pps2->slice_group_change_rate_minus1);
+ }
+ else if( pps1->slice_group_map_type == 6 )
+ {
+ equal &= (pps1->pic_size_in_map_units_minus1 == pps2->pic_size_in_map_units_minus1);
+ if (!equal) return equal;
+ for (i=0; i<=pps1->pic_size_in_map_units_minus1; i++)
+ equal &= (pps1->slice_group_id[i] == pps2->slice_group_id[i]);
+ }
+ }
+
+ equal &= (pps1->num_ref_idx_l0_active_minus1 == pps2->num_ref_idx_l0_active_minus1);
+ equal &= (pps1->num_ref_idx_l1_active_minus1 == pps2->num_ref_idx_l1_active_minus1);
+ equal &= (pps1->weighted_pred_flag == pps2->weighted_pred_flag);
+ equal &= (pps1->weighted_bipred_idc == pps2->weighted_bipred_idc);
+ equal &= (pps1->pic_init_qp_minus26 == pps2->pic_init_qp_minus26);
+ equal &= (pps1->pic_init_qs_minus26 == pps2->pic_init_qs_minus26);
+ equal &= (pps1->chroma_qp_index_offset == pps2->chroma_qp_index_offset);
+ equal &= (pps1->deblocking_filter_control_present_flag == pps2->deblocking_filter_control_present_flag);
+ equal &= (pps1->constrained_intra_pred_flag == pps2->constrained_intra_pred_flag);
+ equal &= (pps1->redundant_pic_cnt_present_flag == pps2->redundant_pic_cnt_present_flag);
+
+ if (!equal) return equal;
+
+ //Fidelity Range Extensions Stuff
+ //It is initialized to zero, so should be ok to check all the time.
+ equal &= (pps1->transform_8x8_mode_flag == pps2->transform_8x8_mode_flag);
+ equal &= (pps1->pic_scaling_matrix_present_flag == pps2->pic_scaling_matrix_present_flag);
+ if(pps1->pic_scaling_matrix_present_flag)
+ {
+ for(i = 0; i < (6 + ((unsigned)pps1->transform_8x8_mode_flag << 1)); i++)
+ {
+ equal &= (pps1->pic_scaling_list_present_flag[i] == pps2->pic_scaling_list_present_flag[i]);
+ if(pps1->pic_scaling_list_present_flag[i])
+ {
+ if(i < 6)
+ {
+ for (j = 0; j < 16; j++)
+ equal &= (pps1->ScalingList4x4[i][j] == pps2->ScalingList4x4[i][j]);
+ }
+ else
+ {
+ for (j = 0; j < 64; j++)
+ equal &= (pps1->ScalingList8x8[i-6][j] == pps2->ScalingList8x8[i-6][j]);
+ }
+ }
+ }
+ }
+ equal &= (pps1->second_chroma_qp_index_offset == pps2->second_chroma_qp_index_offset);
+
+ return equal;
+}
diff --git a/Src/h264dec/lcommon/src/transform.c b/Src/h264dec/lcommon/src/transform.c
new file mode 100644
index 00000000..617ca7c1
--- /dev/null
+++ b/Src/h264dec/lcommon/src/transform.c
@@ -0,0 +1,809 @@
+/*!
+***************************************************************************
+* \file transform.c
+*
+* \brief
+* Transform functions
+*
+* \author
+* Main contributors (see contributors.h for copyright, address and affiliation details)
+* - Alexis Michael Tourapis
+* \date
+* 01. July 2007
+**************************************************************************
+*/
+#include "global.h"
+#include "transform.h"
+#include <emmintrin.h>
+
+void forward4x4(int **block, int **tblock, int pos_y, int pos_x)
+{
+ int i, ii;
+ int tmp[16];
+ int *pTmp = tmp, *pblock;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ for (i=pos_y; i < pos_y + BLOCK_SIZE; i++)
+ {
+ pblock = &block[i][pos_x];
+ p0 = *(pblock++);
+ p1 = *(pblock++);
+ p2 = *(pblock++);
+ p3 = *(pblock );
+
+ t0 = p0 + p3;
+ t1 = p1 + p2;
+ t2 = p1 - p2;
+ t3 = p0 - p3;
+
+ *(pTmp++) = t0 + t1;
+ *(pTmp++) = (t3 << 1) + t2;
+ *(pTmp++) = t0 - t1;
+ *(pTmp++) = t3 - (t2 << 1);
+ }
+
+ // Vertical
+ for (i=0; i < BLOCK_SIZE; i++)
+ {
+ pTmp = tmp + i;
+ p0 = *pTmp;
+ p1 = *(pTmp += BLOCK_SIZE);
+ p2 = *(pTmp += BLOCK_SIZE);
+ p3 = *(pTmp += BLOCK_SIZE);
+
+ t0 = p0 + p3;
+ t1 = p1 + p2;
+ t2 = p1 - p2;
+ t3 = p0 - p3;
+
+ ii = pos_x + i;
+ tblock[pos_y ][ii] = t0 + t1;
+ tblock[pos_y + 1][ii] = t2 + (t3 << 1);
+ tblock[pos_y + 2][ii] = t0 - t1;
+ tblock[pos_y + 3][ii] = t3 - (t2 << 1);
+ }
+}
+
+static void inverse4x4(const h264_short_block_t tblock, h264_short_block_t block, int pos_y, int pos_x)
+{
+ int i;
+ short tmp[16];
+ short *pTmp = tmp;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ t0 = tblock[i][0];
+ t1 = tblock[i][1];
+ t2 = tblock[i][2];
+ t3 = tblock[i][3];
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ }
+
+ // Vertical
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ pTmp = tmp + i;
+ t0 = *pTmp;
+ t1 = *(pTmp += BLOCK_SIZE);
+ t2 = *(pTmp += BLOCK_SIZE);
+ t3 = *(pTmp += BLOCK_SIZE);
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 =(t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+
+ block[0][i] = p0 + p3;
+ block[1][i] = p1 + p2;
+ block[2][i] = p1 - p2;
+ block[3][i] = p0 - p3;
+ }
+}
+
+#ifdef _M_IX86
+// benski> this exists just for conformance testing. not used in production code
+static void inverse4x4_sse2_x86(const h264_short_macroblock_t tblock, h264_short_macroblock_t block, int pos_y, int pos_x)
+{
+ __asm
+ {
+ mov edx, pos_y
+ shl edx, 4 // 16 step stride
+ add edx, pos_x
+ shl edx, 1 // * sizeof(short)
+
+ // eax: pointer to the start of tblock (offset by passed pos_y, pos_x)
+ mov eax, edx
+ add eax, tblock
+
+ // esi: results
+ mov esi, edx
+ add esi, block
+
+ // load 4x4 matrix
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 32[eax]
+ movq mm2, MMWORD PTR 64[eax]
+ movq mm3, MMWORD PTR 96[eax]
+
+ // rotate 4x4 matrix
+ movq mm4, mm0 // p0 = mm4 (copy)
+ punpcklwd mm0, mm2 // r0 = mm0
+ punpckhwd mm4, mm2 // r2 = mm4
+ movq mm5, mm1 // p1 = mm5 (copy)
+ punpcklwd mm1, mm3 // r1 = mm1
+ punpckhwd mm5, mm3 // r3 = mm5
+ movq mm6, mm0 // r0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // r2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+
+ // rotate 4x4 matrix to set up for vertical
+ movq mm4, mm0 // r0 = mm4 (copy)
+ punpcklwd mm0, mm2 // p0 = mm0
+ punpckhwd mm4, mm2 // p2 = mm4
+ movq mm5, mm1 // r1 = mm5 (copy)
+ punpcklwd mm1, mm3 // p1 = mm1
+ punpckhwd mm5, mm3 // p3 = mm5
+ movq mm6, mm0 // p0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // p2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+ movq XMMWORD PTR 0[esi], mm0
+ movq XMMWORD PTR 32[esi], mm1
+ movq XMMWORD PTR 64[esi], mm2
+ movq XMMWORD PTR 96[esi], mm3
+ }
+}
+#endif
+
+static void sample_reconstruct(h264_imgpel_macroblock_t curImg, const h264_imgpel_macroblock_t mpr, const h264_short_block_t tblock, int joff, int mb_x, int max_imgpel_value)
+{
+ #ifdef _M_IX86
+ __asm
+ {
+ // mm0 : constant value 32
+ mov edx, 0x00200020
+ movd mm0, edx
+ punpckldq mm0, mm0
+
+ // ecx: y offset
+ mov ecx, joff
+ shl ecx, 4 // imgpel stuff is going to be 16 byte stride
+ add ecx, mb_x
+
+ // eax: curImg
+ mov eax, curImg
+ add eax, ecx
+
+ // edx: mpr
+ mov edx, mpr
+ add edx, ecx
+
+ // ecx: tblock (which is short, not byte)
+ mov ecx, tblock
+
+ // mm7: zero
+ pxor mm7, mm7
+
+ // load coefficients
+ movq mm1, MMWORD PTR 0[ecx]
+ movq mm2, MMWORD PTR 8[ecx]
+ movq mm3, MMWORD PTR 16[ecx]
+ movq mm4, MMWORD PTR 24[ecx]
+ paddw mm1, mm0 // rres + 32
+ paddw mm2, mm0 // rres + 32
+ paddw mm3, mm0 // rres + 32
+ paddw mm0, mm4 // rres + 32
+ psraw mm1, 6 // (rres + 32) >> 6
+ psraw mm2, 6 // (rres + 32) >> 6
+ psraw mm3, 6 // (rres + 32) >> 6
+ psraw mm0, 6 // (rres + 32) >> 6
+ // mm1-mm3: tblock[0] - tblock[2], mm0: tblock[3]
+
+ // convert mpr from unsigned char to short
+ movd mm4, DWORD PTR 0[edx]
+ movd mm5, DWORD PTR 16[edx]
+ movd mm6, DWORD PTR 32[edx]
+ punpcklbw mm4, mm7
+ punpcklbw mm5, mm7
+ punpcklbw mm6, mm7
+ paddsw mm4, mm1 // pred_row + rres_row
+ movd mm1, DWORD PTR 48[edx] // reuse mm1 for mpr[3]
+ paddsw mm5, mm2 // pred_row + rres_row
+ punpcklbw mm1, mm7
+ paddsw mm6, mm3 // pred_row + rres_row
+ paddsw mm1, mm0 // pred_row + rres_row
+ // results in mm4, mm5, mm6, mm1
+
+ // move back to 8 bit
+ packuswb mm4, mm7
+ packuswb mm5, mm7
+ packuswb mm6, mm7
+ packuswb mm1, mm7
+ movd DWORD PTR 0[eax], mm4
+ movd DWORD PTR 16[eax], mm5
+ movd DWORD PTR 32[eax], mm6
+ movd DWORD PTR 48[eax], mm1
+ }
+#else
+ int i, j;
+
+ for (j = 0; j < BLOCK_SIZE; j++)
+ {
+ for (i=0;i<BLOCK_SIZE;i++)
+ curImg[j+joff][mb_x+i] = (imgpel) iClip1( max_imgpel_value, rshift_rnd_sf(tblock[j][i], DQ_BITS) + mpr[j+joff][mb_x+i]);
+ }
+#endif
+}
+
+#if defined(_M_IX86) && defined(_DEBUG)
+void itrans4x4_sse2(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
+{
+ __declspec(align(32)) static const short const32[4] = {32, 32, 32, 32};
+ __asm
+ {
+ mov edx, pos_y
+ shl edx, 4 // imgpel stuff is going to be 16 byte stride
+ add edx, pos_x
+
+ // eax: tblock
+ lea eax, [edx*2]
+ add eax, tblock
+
+ // ecx: mpr
+ mov ecx, mb_pred
+ add ecx, edx
+
+ // edx: results
+ add edx, mb_rec
+
+ // load 4x4 matrix
+ movq mm0, MMWORD PTR 0[eax]
+ movq mm1, MMWORD PTR 32[eax]
+ movq mm2, MMWORD PTR 64[eax]
+ movq mm3, MMWORD PTR 96[eax]
+
+ // rotate 4x4 matrix
+ movq mm4, mm0 // p0 = mm4 (copy)
+ punpcklwd mm0, mm2 // r0 = mm0
+ punpckhwd mm4, mm2 // r2 = mm4
+ movq mm5, mm1 // p1 = mm5 (copy)
+ punpcklwd mm1, mm3 // r1 = mm1
+ punpckhwd mm5, mm3 // r3 = mm5
+ movq mm6, mm0 // r0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // r2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+
+ // rotate 4x4 matrix to set up for vertical
+ movq mm4, mm0 // r0 = mm4 (copy)
+ punpcklwd mm0, mm2 // p0 = mm0
+ punpckhwd mm4, mm2 // p2 = mm4
+ movq mm5, mm1 // r1 = mm5 (copy)
+ punpcklwd mm1, mm3 // p1 = mm1
+ punpckhwd mm5, mm3 // p3 = mm5
+ movq mm6, mm0 // p0 = mm6 (copy)
+ punpcklwd mm0, mm1 // t0 = mm0
+ punpckhwd mm6, mm1 // t1 = mm6
+ movq mm1, mm4 // p2 = mm1 (copy)
+ punpcklwd mm1, mm5 // t2 = mm1
+ punpckhwd mm4, mm5 // t3 = mm4
+
+ /* register state:
+ mm0: t0
+ mm1: t2
+ mm2:
+ mm3:
+ mm4: t3
+ mm5:
+ mm6: t1
+ mm7:
+ */
+ /*
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = (t1 >> 1) - t3;
+ p3 = t1 + (t3 >> 1);
+ */
+ movq mm2, mm0 // mm2 = t0 (copy)
+ paddw mm0, mm1 // mm0 = p0
+ psubw mm2, mm1 // mm2 = p1, mm1 available
+ movq mm5, mm6 // mm5 = t1 (copy)
+ psraw mm5, 1 // mm5 = (t1 >> 1)
+ psubw mm5, mm4 // mm5 = p2
+ psraw mm4, 1 // mm4 = (t3 >> 1)
+ paddw mm6, mm4 // mm6 = p3
+
+ /* register state:
+ mm0: p0
+ mm1:
+ mm2: p1
+ mm3:
+ mm4:
+ mm5: p2
+ mm6: p3
+ mm7:
+ */
+
+ /*
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ */
+
+ movq mm3, mm0 // mm3 = p0 (copy)
+ paddw mm0, mm6 // mm0 = r0
+ movq mm1, mm2 // mm1 = p1 (copy)
+ paddw mm1, mm5 // mm1 = r1
+ psubw mm2, mm5 // mm2 = r2, mm5 available
+ psubw mm3, mm6 // mm3 = r3
+
+ /* register state:
+ mm0: r0
+ mm1: r1
+ mm2: r2
+ mm3: r3
+ mm4:
+ mm5:
+ mm6:
+ mm7:
+ */
+/* --- 4x4 iDCT done, now time to combine with mpr --- */
+ // mm0 : constant value 32
+ movq mm7, const32
+
+ paddw mm0, mm7 // rres + 32
+ psraw mm0, 6 // (rres + 32) >> 6
+ paddw mm1, mm7 // rres + 32
+ psraw mm1, 6 // (rres + 32) >> 6
+ paddw mm2, mm7 // rres + 32
+ psraw mm2, 6 // (rres + 32) >> 6
+ paddw mm3, mm7 // rres + 32
+ psraw mm3, 6 // (rres + 32) >> 6
+
+ pxor mm7, mm7
+
+ // convert mpr from unsigned char to short
+ movd mm4, DWORD PTR 0[ecx]
+ movd mm5, DWORD PTR 16[ecx]
+ movd mm6, DWORD PTR 32[ecx]
+ punpcklbw mm4, mm7
+ punpcklbw mm5, mm7
+ punpcklbw mm6, mm7
+ paddsw mm4, mm0 // pred_row + rres_row
+ movd mm0, DWORD PTR 48[ecx] // reuse mm0 for mpr[3]
+ paddsw mm5, mm1 // pred_row + rres_row
+ punpcklbw mm0, mm7
+ paddsw mm6, mm2 // pred_row + rres_row
+ paddsw mm0, mm3 // pred_row + rres_row
+ // results in mm4, mm5, mm6, mm0
+
+ // move back to 8 bit
+ packuswb mm4, mm7
+ packuswb mm5, mm7
+ packuswb mm6, mm7
+ packuswb mm0, mm7
+ movd DWORD PTR 0[edx], mm4
+ movd DWORD PTR 16[edx], mm5
+ movd DWORD PTR 32[edx], mm6
+ movd DWORD PTR 48[edx], mm0
+ }
+}
+#elif defined(_M_X64)
+static void itrans4x4_sse2(const h264_int_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
+{
+ __declspec(align(32)) static const int const32[4] = {32, 32, 32, 32};
+ __m128i p0,p1,p2,p3;
+ __m128i t0,t1,t2,t3;
+ __m128i r0,r1,r2,r3;
+ __m128i c32, zero;
+
+ // horizontal
+ // load registers in vertical mode, we'll rotate them next
+ p0 = _mm_loadu_si128((__m128i *)&tblock[pos_y][pos_x]); // 00 01 02 03
+ p1 = _mm_loadu_si128((__m128i *)&tblock[pos_y+1][pos_x]); // 10 11 12 13
+ p2 = _mm_loadu_si128((__m128i *)&tblock[pos_y+2][pos_x]); // 20 21 22 23
+ p3 = _mm_loadu_si128((__m128i *)&tblock[pos_y+3][pos_x]); // 30 31 32 33
+
+ // rotate 4x4 matrix
+ r0 = _mm_unpacklo_epi32(p0, p2); // 00 20 01 21
+ r1 = _mm_unpacklo_epi32(p1, p3); // 10 30 11 31
+ r2 = _mm_unpackhi_epi32(p0, p2); // 02 22 03 23
+ r3 = _mm_unpackhi_epi32(p1, p3); // 12 32 13 33
+ t0 = _mm_unpacklo_epi32(r0, r1); // 00 10 20 30
+ t1 = _mm_unpackhi_epi32(r0, r1); // 01 11 21 31
+ t2 = _mm_unpacklo_epi32(r2, r3); // 02 12 22 32
+ t3 = _mm_unpackhi_epi32(r2, r3); // 03 13 23 33
+
+ p0 = _mm_add_epi32(t0, t2); //t0 + t2;
+ p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
+ p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
+ p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
+ p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
+ p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
+
+ t0 = _mm_add_epi32(p0, p3); //p0 + p3;
+ t1 = _mm_add_epi32(p1, p2);//p1 + p2;
+ t2 = _mm_sub_epi32(p1, p2); //p1 - p2;
+ t3 = _mm_sub_epi32(p0, p3); //p0 - p3;
+
+ // rotate 4x4 matrix to set up for vertical
+ r0 = _mm_unpacklo_epi32(t0, t2);
+ r1 = _mm_unpacklo_epi32(t1, t3);
+ r2 = _mm_unpackhi_epi32(t0, t2);
+ r3 = _mm_unpackhi_epi32(t1, t3);
+ t0 = _mm_unpacklo_epi32(r0, r1);
+ t1 = _mm_unpackhi_epi32(r0, r1);
+ t2 = _mm_unpacklo_epi32(r2, r3);
+ t3 = _mm_unpackhi_epi32(r2, r3);
+
+ // vertical
+ p0 = _mm_add_epi32(t0, t2); //t0 + t2;
+ p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
+ p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
+ r0 = _mm_add_epi32(p0, p3); //p0 + p3;
+ r3 = _mm_sub_epi32(p0, p3); //p0 - p3;
+ p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
+ p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
+ p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
+ r1 = _mm_add_epi32(p1, p2);//p1 + p2;
+ r2 = _mm_sub_epi32(p1, p2); //p1 - p2;
+
+ c32 = _mm_load_si128((const __m128i *)const32);
+ zero = _mm_setzero_si128();
+
+ // (x + 32) >> 6
+ r0 = _mm_add_epi32(r0, c32);
+ r0 = _mm_srai_epi32(r0, 6);
+ r1 = _mm_add_epi32(r1, c32);
+ r1 = _mm_srai_epi32(r1, 6);
+ r2 = _mm_add_epi32(r2, c32);
+ r2 = _mm_srai_epi32(r2, 6);
+ r3 = _mm_add_epi32(r3, c32);
+ r3 = _mm_srai_epi32(r3, 6);
+
+ // convert to 16bit values
+ r0 = _mm_packs_epi32(r0, r1);
+ r2 = _mm_packs_epi32(r2, r3);
+
+ // convert mpr from unsigned char to short
+ p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y][pos_x]);
+ p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+1][pos_x]);
+ p0 = _mm_unpacklo_epi32(p0, p1);
+ p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
+ r0 = _mm_add_epi16(r0, p0);
+
+ p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+2][pos_x]);
+ p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+3][pos_x]);
+ p0 = _mm_unpacklo_epi32(p0, p1);
+ p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
+ r2 = _mm_add_epi16(r2, p0);
+
+ r0 = _mm_packus_epi16(r0, r2); // convert to unsigned char
+ *(int32_t *)&mb_rec[pos_y][pos_x] = _mm_cvtsi128_si32(r0);
+ r0 = _mm_srli_si128(r0, 4);
+ *(int32_t *)&mb_rec[pos_y+1][pos_x] = _mm_cvtsi128_si32(r0);
+ r0 = _mm_srli_si128(r0, 4);
+ *(int32_t *)&mb_rec[pos_y+2][pos_x] = _mm_cvtsi128_si32(r0);
+ r0 = _mm_srli_si128(r0, 4);
+ *(int32_t *)&mb_rec[pos_y+3][pos_x] = _mm_cvtsi128_si32(r0);
+}
+#endif
+
+void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
+{
+ inverse4x4(tblock, (h264_short_block_row_t *)tblock,pos_y,pos_x);
+ sample_reconstruct(mb_rec, mb_pred, tblock, pos_y, pos_x, 255);
+}
+
+void ihadamard4x4(int block[4][4])
+{
+ int i;
+ int tmp[16];
+ int *pTmp = tmp;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ t0 = block[i][0];
+ t1 = block[i][1];
+ t2 = block[i][2];
+ t3 = block[i][3];
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = t1 - t3;
+ p3 = t1 + t3;
+
+ *(pTmp++) = p0 + p3;
+ *(pTmp++) = p1 + p2;
+ *(pTmp++) = p1 - p2;
+ *(pTmp++) = p0 - p3;
+ }
+
+ // Vertical
+ for (i = 0; i < BLOCK_SIZE; i++)
+ {
+ pTmp = tmp + i;
+ t0 = *pTmp;
+ t1 = *(pTmp += BLOCK_SIZE);
+ t2 = *(pTmp += BLOCK_SIZE);
+ t3 = *(pTmp += BLOCK_SIZE);
+
+ p0 = t0 + t2;
+ p1 = t0 - t2;
+ p2 = t1 - t3;
+ p3 = t1 + t3;
+
+ block[0][i] = p0 + p3;
+ block[1][i] = p1 + p2;
+ block[2][i] = p1 - p2;
+ block[3][i] = p0 - p3;
+ }
+}
+
+void ihadamard4x2(int **tblock, int **block)
+{
+ int i;
+ int tmp[8];
+ int *pTmp = tmp;
+ int p0,p1,p2,p3;
+ int t0,t1,t2,t3;
+
+ // Horizontal
+ *(pTmp++) = tblock[0][0] + tblock[1][0];
+ *(pTmp++) = tblock[0][1] + tblock[1][1];
+ *(pTmp++) = tblock[0][2] + tblock[1][2];
+ *(pTmp++) = tblock[0][3] + tblock[1][3];
+
+ *(pTmp++) = tblock[0][0] - tblock[1][0];
+ *(pTmp++) = tblock[0][1] - tblock[1][1];
+ *(pTmp++) = tblock[0][2] - tblock[1][2];
+ *(pTmp ) = tblock[0][3] - tblock[1][3];
+
+ // Vertical
+ pTmp = tmp;
+ for (i = 0; i < 2; i++)
+ {
+ p0 = *(pTmp++);
+ p1 = *(pTmp++);
+ p2 = *(pTmp++);
+ p3 = *(pTmp++);
+
+ t0 = p0 + p2;
+ t1 = p0 - p2;
+ t2 = p1 - p3;
+ t3 = p1 + p3;
+
+ // coefficients (transposed)
+ block[0][i] = t0 + t3;
+ block[1][i] = t1 + t2;
+ block[2][i] = t1 - t2;
+ block[3][i] = t0 - t3;
+ }
+}
+
+//following functions perform 8 additions, 8 assignments. Should be a bit faster
+void ihadamard2x2(int tblock[4], int block[4])
+{
+ int t0,t1,t2,t3;
+
+ t0 = tblock[0] + tblock[1];
+ t1 = tblock[0] - tblock[1];
+ t2 = tblock[2] + tblock[3];
+ t3 = tblock[2] - tblock[3];
+
+ block[0] = (t0 + t2);
+ block[1] = (t1 + t3);
+ block[2] = (t0 - t2);
+ block[3] = (t1 - t3);
+}
+
diff --git a/Src/h264dec/lcommon/src/win32.c b/Src/h264dec/lcommon/src/win32.c
new file mode 100644
index 00000000..7d921e1e
--- /dev/null
+++ b/Src/h264dec/lcommon/src/win32.c
@@ -0,0 +1,67 @@
+
+/*!
+ *************************************************************************************
+ * \file win32.c
+ *
+ * \brief
+ * Platform dependent code
+ *
+ * \author
+ * Main contributors (see contributors.h for copyright, address and affiliation details)
+ * - Karsten Suehring <suehring@hhi.de>
+ *************************************************************************************
+ */
+
+#include "global.h"
+
+
+#ifdef _WIN32
+
+static LARGE_INTEGER freq;
+
+void gettime(TIME_T* time)
+{
+ QueryPerformanceCounter(time);
+}
+
+int64 timediff(TIME_T* start, TIME_T* end)
+{
+ return (int64)((end->QuadPart - start->QuadPart));
+}
+
+int64 timenorm(int64 cur_time)
+{
+ static int first = 1;
+
+ if(first)
+ {
+ QueryPerformanceFrequency(&freq);
+ first = 0;
+ }
+
+ return (int64)(cur_time * 1000 /(freq.QuadPart));
+}
+
+#else
+
+static struct timezone tz;
+
+void gettime(TIME_T* time)
+{
+ gettimeofday(time, &tz);
+}
+
+int64 timediff(TIME_T* start, TIME_T* end)
+{
+ int t1, t2;
+
+ t1 = end->tv_sec - start->tv_sec;
+ t2 = end->tv_usec - start->tv_usec;
+ return (int64) t2 + (int64) t1 * (int64) 1000000;
+}
+
+int64 timenorm(int64 cur_time)
+{
+ return (int64)(cur_time / (int64) 1000);
+}
+#endif