diff options
Diffstat (limited to 'Src/h264dec/ldecod/src/intra16x16_pred.c')
-rw-r--r-- | Src/h264dec/ldecod/src/intra16x16_pred.c | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/Src/h264dec/ldecod/src/intra16x16_pred.c b/Src/h264dec/ldecod/src/intra16x16_pred.c new file mode 100644 index 00000000..a2b89ca2 --- /dev/null +++ b/Src/h264dec/ldecod/src/intra16x16_pred.c @@ -0,0 +1,428 @@ +/*! + ************************************************************************************* + * \file intra16x16_pred.c + * + * \brief + * Functions for intra 8x8 prediction + * + * \author + * Main contributors (see contributors.h for copyright, + * address and affiliation details) + * - Yuri Vatis + * - Jan Muenster + * - Alexis Michael Tourapis <alexismt@ieee.org> + * + ************************************************************************************* + */ +#include "global.h" +#include "intra16x16_pred.h" +#include "mb_access.h" +#include "image.h" +#include <emmintrin.h> +static void memset_16x16(h264_imgpel_macroblock_row_t *mb_pred, int pred) +{ + if (sse2_flag) + { + __m128i xmm_pred = _mm_set1_epi8(pred); + int i; + __m128i *xmm_macroblock = (__m128i *)mb_pred; + for (i=0;i<16;i++) + { + _mm_store_si128(xmm_macroblock++, xmm_pred); + } + } +#ifdef _M_IX86 + else + { + __m64 mmx_pred = _mm_set1_pi8(pred); + int i; + __m64 *mmx_macroblock = (__m64 *)mb_pred; + for (i=0;i<16;i++) + { + *mmx_macroblock++ = mmx_pred; + *mmx_macroblock++ = mmx_pred; + } + } +#else + else + { + int ii, jj; + for (jj = 0; jj < MB_BLOCK_SIZE; jj++) + { + for (ii = 0; ii < MB_BLOCK_SIZE; ii++) + { + mb_pred[jj][ii]=(imgpel) pred; + } + } + } + #endif +} + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 DC prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra16x16_dc_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int s0 = 0, s1 = 0, s2 = 0; + + int i; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int up_avail, left_avail, left_up_avail; + + s1=s2=0; + + p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]); + p_Vid->getNeighbourLeftLuma(currMB, &left[1]); + p_Vid->getNeighbourNPLumaNB(currMB, 2-1, &left[2]); + p_Vid->getNeighbourNPLumaNB(currMB, 3-1, &left[3]); + p_Vid->getNeighbourNPLumaNB(currMB, 4-1, &left[4]); + p_Vid->getNeighbourNPLumaNB(currMB, 5-1, &left[5]); + p_Vid->getNeighbourNPLumaNB(currMB, 6-1, &left[6]); + p_Vid->getNeighbourNPLumaNB(currMB, 7-1, &left[7]); + p_Vid->getNeighbourNPLumaNB(currMB, 8-1, &left[8]); + p_Vid->getNeighbourNPLumaNB(currMB, 9-1, &left[9]); + p_Vid->getNeighbourNPLumaNB(currMB, 10-1, &left[10]); + p_Vid->getNeighbourNPLumaNB(currMB, 11-1, &left[11]); + p_Vid->getNeighbourNPLumaNB(currMB, 12-1, &left[12]); + p_Vid->getNeighbourNPLumaNB(currMB, 13-1, &left[13]); + p_Vid->getNeighbourNPLumaNB(currMB, 14-1, &left[14]); + p_Vid->getNeighbourNPLumaNB(currMB, 15-1, &left[15]); + p_Vid->getNeighbourNPLumaNB(currMB, 16-1, &left[16]); + + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + for (i = 1, left_avail = 1; i < 17; ++i) + left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + if (up_avail) + { + s1 += imgY[up.pos_y][up.pos_x+0]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+1]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+2]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+3]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+4]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+5]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+6]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+7]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+8]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+9]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+10]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+11]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+12]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+13]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+14]; // sum hor pix + s1 += imgY[up.pos_y][up.pos_x+15]; // sum hor pix + } + + if (left_avail) + { + s2 += imgY[left[0 + 1].pos_y][left[0 + 1].pos_x]; // sum vert pix + s2 += imgY[left[1 + 1].pos_y][left[1 + 1].pos_x]; // sum vert pix + s2 += imgY[left[2 + 1].pos_y][left[2 + 1].pos_x]; // sum vert pix + s2 += imgY[left[3 + 1].pos_y][left[3 + 1].pos_x]; // sum vert pix + s2 += imgY[left[4 + 1].pos_y][left[4 + 1].pos_x]; // sum vert pix + s2 += imgY[left[5 + 1].pos_y][left[5 + 1].pos_x]; // sum vert pix + s2 += imgY[left[6 + 1].pos_y][left[6 + 1].pos_x]; // sum vert pix + s2 += imgY[left[7 + 1].pos_y][left[7 + 1].pos_x]; // sum vert pix + s2 += imgY[left[8 + 1].pos_y][left[8 + 1].pos_x]; // sum vert pix + s2 += imgY[left[9 + 1].pos_y][left[9 + 1].pos_x]; // sum vert pix + s2 += imgY[left[10 + 1].pos_y][left[10 + 1].pos_x]; // sum vert pix + s2 += imgY[left[11 + 1].pos_y][left[11 + 1].pos_x]; // sum vert pix + s2 += imgY[left[12 + 1].pos_y][left[12 + 1].pos_x]; // sum vert pix + s2 += imgY[left[13 + 1].pos_y][left[13 + 1].pos_x]; // sum vert pix + s2 += imgY[left[14 + 1].pos_y][left[14 + 1].pos_x]; // sum vert pix + s2 += imgY[left[15 + 1].pos_y][left[15 + 1].pos_x]; // sum vert pix + } + + if (up_avail && left_avail) + s0 = (s1 + s2 + 16)>>5; // no edge + else if (!up_avail && left_avail) + s0 = (s2 + 8)>>4; // upper edge + else if (up_avail && !left_avail) + s0 = (s1 + 8)>>4; // left edge + else + s0 = p_Vid->dc_pred_value_comp[pl]; // top left corner, nothing to predict from + + memset_16x16(currSlice->mb_pred[pl], s0); + + return DECODING_OK; +} + + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 vertical prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra16x16_vert_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int j; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + + PixelPos up; //!< pixel position p(0,-1) + + int up_avail; + + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + } + + if (!up_avail) + error ("invalid 16x16 intra pred Mode VERT_PRED_16",500); + + for(j=0;j<MB_BLOCK_SIZE;++j) + { + // TODO; take advantage of imgY's stride + memcpy(&currSlice->mb_pred[pl][j][0], &(imgY[up.pos_y][up.pos_x]), MB_BLOCK_SIZE * sizeof(imgpel)); + } + + return DECODING_OK; +} + + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 horizontal prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static inline int intra16x16_hor_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int i,j; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + imgpel prediction; + + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int left_avail, left_up_avail; + + for (i=0;i<17;++i) + { + p_Vid->getNeighbourNXLuma(currMB, i-1, &left[i]); + } + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + for (i = 1, left_avail = 1; i < 17; ++i) + left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + if (!left_avail) + error ("invalid 16x16 intra pred Mode HOR_PRED_16",500); + + for(j = 0; j < MB_BLOCK_SIZE; ++j) + { + prediction = imgY[left[j+1].pos_y][left[j+1].pos_x]; + for(i = 0; i < MB_BLOCK_SIZE; ++i) + currSlice->mb_pred[pl][j][i]= prediction; // store predicted 16x16 block + } + + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 horizontal prediction mode + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * + *********************************************************************** + */ +static void planeset(h264_imgpel_macroblock_row_t *dest, int iaa, int ib, int ic) +{ + int j; + __m128i i0_7 = _mm_setr_epi16(-7,-6,-5,-4,-3,-2,-1, 0); + __m128i i8_15 = _mm_setr_epi16(1,2,3,4,5,6,7,8); + __m128i xmm_ib = _mm_set1_epi16(ib); + int j7ic = iaa + -7 * ic + 16; + i0_7 = _mm_mullo_epi16(i0_7, xmm_ib); + i8_15 = _mm_mullo_epi16(i8_15, xmm_ib); + for (j = 0;j < MB_BLOCK_SIZE; ++j) + { + __m128i xmm_j7ic = _mm_set1_epi16(j7ic); + __m128i xmm_lo = _mm_add_epi16(i0_7, xmm_j7ic); + __m128i xmm_hi = _mm_add_epi16(i8_15, xmm_j7ic); + __m128i xmm_store; + xmm_lo = _mm_srai_epi16(xmm_lo, 5); + xmm_hi = _mm_srai_epi16(xmm_hi, 5); + xmm_store = _mm_packus_epi16(xmm_lo, xmm_hi); + _mm_store_si128((__m128i *)dest[j], xmm_store); + j7ic += ic; + }// store plane prediction +} + +static inline int intra16x16_plane_pred(Macroblock *currMB, + ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int i,j; + + int ih = 0, iv = 0; + int ib,ic,iaa; + + imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img; + imgpel *mpr_line; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int up_avail, left_avail, left_up_avail; + + p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]); + p_Vid->getNeighbourLeftLuma(currMB, &left[1]); + for (i=2;i<17; ++i) + { + p_Vid->getNeighbourNPLumaNB(currMB, i-1, &left[i]); + } + p_Vid->getNeighbourUpLuma(currMB, &up); + + if (!p_Vid->active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0; + for (i = 1, left_avail = 1; i < 17; ++i) + left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0; + } + + if (!up_avail || !left_up_avail || !left_avail) + error ("invalid 16x16 intra pred Mode PLANE_16",500); + + mpr_line = &imgY[up.pos_y][up.pos_x+7]; + for (i = 1; i < 8; ++i) + { + ih += i*(mpr_line[i] - mpr_line[-i]); + iv += i*(imgY[left[8+i].pos_y][left[8+i].pos_x] - imgY[left[8-i].pos_y][left[8-i].pos_x]); + } + + ih += 8*(mpr_line[8] - imgY[left[0].pos_y][left[0].pos_x]); + iv += 8*(imgY[left[16].pos_y][left[16].pos_x] - imgY[left[0].pos_y][left[0].pos_x]); + + ib=(5 * ih + 32)>>6; + ic=(5 * iv + 32)>>6; + + iaa=16 * (mpr_line[8] + imgY[left[16].pos_y][left[16].pos_x]); + if (sse2_flag) + { + planeset(currSlice->mb_pred[pl], iaa, ib, ic); + } + else + { + // TODO: MMX + for (j = 0;j < MB_BLOCK_SIZE; ++j) + { + int j7ic = iaa + (j - 7) * ic + 16; + for (i = 0;i < MB_BLOCK_SIZE; ++i) + { + currSlice->mb_pred[pl][j][i] = (imgpel) iClip1(max_imgpel_value, (((i - 7) * ib + j7ic) >> 5)); + } + }// store plane prediction + } + return DECODING_OK; +} + +/*! + *********************************************************************** + * \brief + * makes and returns 16x16 intra prediction blocks + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * SEARCH_SYNC search next sync element as errors while decoding occured + *********************************************************************** + */ +// TODO: replace with ippiPredictIntra_16x16_H264_8u_C1IR ? +int intrapred16x16(Macroblock *currMB, //!< Current Macroblock + ColorPlane pl, //!< Current colorplane (for 4:4:4) + int predmode) //!< prediction mode +{ + switch (predmode) + { + case VERT_PRED_16: // vertical prediction from block above + return (intra16x16_vert_pred(currMB, pl)); + break; + case HOR_PRED_16: // horizontal prediction from left block + return (intra16x16_hor_pred(currMB, pl)); + break; + case DC_PRED_16: // DC prediction + return (intra16x16_dc_pred(currMB, pl)); + break; + case PLANE_16:// 16 bit integer plan pred + return (intra16x16_plane_pred(currMB, pl)); + break; + default: + { // indication of fault in bitstream,exit + printf("illegal 16x16 intra prediction mode input: %d\n",predmode); + return SEARCH_SYNC; + } + } +} + |