diff options
Diffstat (limited to 'Src/h264dec/ldecod/src/block.c')
-rw-r--r-- | Src/h264dec/ldecod/src/block.c | 929 |
1 files changed, 929 insertions, 0 deletions
diff --git a/Src/h264dec/ldecod/src/block.c b/Src/h264dec/ldecod/src/block.c new file mode 100644 index 00000000..d048f956 --- /dev/null +++ b/Src/h264dec/ldecod/src/block.c @@ -0,0 +1,929 @@ + +/*! + *********************************************************************** + * \file + * block.c + * + * \brief + * Block functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langoy <inge.lille-langoy@telenor.com> + * - Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + *********************************************************************** + */ + +#include "contributors.h" + +#include "global.h" +#include "block.h" +#include "image.h" +#include "mb_access.h" +#include "transform.h" +#include "quant.h" +#include "memalloc.h" +#include "optim.h" + + +/*! + **************************************************************************** + * \brief + * Inverse 4x4 lossless_qpprime transformation, transforms cof to mb_rres + **************************************************************************** + */ +void itrans4x4_ls(const h264_short_block_row_t *tblock, + const h264_imgpel_macroblock_row_t *mb_pred, + h264_imgpel_macroblock_row_t *mb_rec, + int ioff, //!< index to 4x4 block + int joff) //!< index to 4x4 block +{ + int i,j; + for (j = 0; j < BLOCK_SIZE; ++j) + { + for (i = 0; i < BLOCK_SIZE; ++i) + { + mb_rec[j+joff][i+ioff] = (imgpel) iClip1(255/*max_imgpel_value*/, mb_pred[j+joff][i+ioff] + tblock[j][i]); + } + } +} + + +/*! +************************************************************************ +* \brief +* Inverse residual DPCM for Intra lossless coding +* +************************************************************************ +*/ +void Inv_Residual_trans_4x4(Macroblock *currMB, //!< current macroblock + ColorPlane pl, //!< used color plane + int ioff, //!< index to 4x4 block + int joff) //!< index to 4x4 block +{ + int i,j; + h264_short_block_t temp; + Slice *currSlice = currMB->p_Slice; + int subblock = cof4_pos_to_subblock[joff>>2][ioff>>2]; + + h264_short_block_row_t *tblock = currSlice->cof4[pl][subblock]; + + if(currMB->ipmode_DPCM == VERT_PRED) + { + for(i=0; i<4; ++i) + { + temp[0][i] = tblock[0][i]; + temp[1][i] = tblock[1][i] + temp[0][i]; + temp[2][i] = tblock[2][i] + temp[1][i]; + temp[3][i] = tblock[3][i] + temp[2][i]; + } + } + else if(currMB->ipmode_DPCM == HOR_PRED) + { + for(j=0; j<4; ++j) + { + temp[j][0] = tblock[j][0]; + temp[j][1] = tblock[j][1] + temp[j][0]; + temp[j][2] = tblock[j][2] + temp[j][1]; + temp[j][3] = tblock[j][3] + temp[j][2]; + } + } + else + { + for (j = 0; j < BLOCK_SIZE; ++j) + for (i = 0; i < BLOCK_SIZE; ++i) + temp[j][i] = tblock[j][i]; + } + + for (j = 0; j < BLOCK_SIZE; ++j) + { + for (i = 0; i < BLOCK_SIZE; ++i) + { + currSlice->mb_rec[pl][j+joff][i+ioff] = (imgpel) (temp[j][i] + currSlice->mb_pred[pl][j+joff][i+ioff]); + } + } +} + +/*! +************************************************************************ +* \brief +* Inverse residual DPCM for Intra lossless coding +* +* \par Input: +* ioff_x,joff_y: Block position inside a macro block (0,8). +************************************************************************ +*/ +//For residual DPCM +void Inv_Residual_trans_8x8(Macroblock *currMB, ColorPlane pl, int ioff,int joff) +{ + Slice *currSlice = currMB->p_Slice; + int i, j; + h264_short_8x8block_t temp; + + int block = (joff>>2) + (ioff>>3); + + if(currMB->ipmode_DPCM == VERT_PRED) + { + for(i=0; i<8; ++i) + { + temp[0][i] = currSlice->mb_rres8[pl][block][0][i]; + temp[1][i] = currSlice->mb_rres8[pl][block][1][i] + temp[0][i]; + temp[2][i] = currSlice->mb_rres8[pl][block][2][i] + temp[1][i]; + temp[3][i] = currSlice->mb_rres8[pl][block][3][i] + temp[2][i]; + temp[4][i] = currSlice->mb_rres8[pl][block][4][i] + temp[3][i]; + temp[5][i] = currSlice->mb_rres8[pl][block][5][i] + temp[4][i]; + temp[6][i] = currSlice->mb_rres8[pl][block][6][i] + temp[5][i]; + temp[7][i] = currSlice->mb_rres8[pl][block][7][i] + temp[6][i]; + } + for(i=0; i<8; ++i) + { + currSlice->mb_rres8[pl][block][0][i]=temp[0][i]; + currSlice->mb_rres8[pl][block][1][i]=temp[1][i]; + currSlice->mb_rres8[pl][block][2][i]=temp[2][i]; + currSlice->mb_rres8[pl][block][3][i]=temp[3][i]; + currSlice->mb_rres8[pl][block][4][i]=temp[4][i]; + currSlice->mb_rres8[pl][block][5][i]=temp[5][i]; + currSlice->mb_rres8[pl][block][6][i]=temp[6][i]; + currSlice->mb_rres8[pl][block][7][i]=temp[7][i]; + } + } + else if(currMB->ipmode_DPCM == HOR_PRED)//HOR_PRED + { + for(i=0; i<8; ++i) + { + temp[i][0] = currSlice->mb_rres8[pl][block][i][0]; + temp[i][1] = currSlice->mb_rres8[pl][block][i][1] + temp[i][0]; + temp[i][2] = currSlice->mb_rres8[pl][block][i][2] + temp[i][1]; + temp[i][3] = currSlice->mb_rres8[pl][block][i][3] + temp[i][2]; + temp[i][4] = currSlice->mb_rres8[pl][block][i][4] + temp[i][3]; + temp[i][5] = currSlice->mb_rres8[pl][block][i][5] + temp[i][4]; + temp[i][6] = currSlice->mb_rres8[pl][block][i][6] + temp[i][5]; + temp[i][7] = currSlice->mb_rres8[pl][block][i][7] + temp[i][6]; + } + for(i=0; i<8; ++i) + { + currSlice->mb_rres8[pl][block][i][0]=temp[i][0]; + currSlice->mb_rres8[pl][block][i][1]=temp[i][1]; + currSlice->mb_rres8[pl][block][i][2]=temp[i][2]; + currSlice->mb_rres8[pl][block][i][3]=temp[i][3]; + currSlice->mb_rres8[pl][block][i][4]=temp[i][4]; + currSlice->mb_rres8[pl][block][i][5]=temp[i][5]; + currSlice->mb_rres8[pl][block][i][6]=temp[i][6]; + currSlice->mb_rres8[pl][block][i][7]=temp[i][7]; + } + } + + for (j = 0; j < BLOCK_SIZE_8x8; ++j) + { + for (i = 0; i < BLOCK_SIZE_8x8; ++i) + { + currSlice->mb_rec[pl][joff+j][ioff+i] = (imgpel) (currSlice->mb_rres8[pl][block][j][i] + currSlice->mb_pred[pl][joff+j][ioff+i]); + } + } +} + +/*! + *********************************************************************** + * \brief + * Luma DC inverse transform + *********************************************************************** + */ +void itrans_2(Macroblock *currMB, ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + int transform_pl = IS_INDEPENDENT(p_Vid) ? PLANE_Y /*p_Vid->colour_plane_id*/ : pl; + h264_short_block_t *blocks = currSlice->cof4[transform_pl]; + int qp_scaled = currMB->qp_scaled[pl]; + + int qp_per = p_Vid->qp_per_matrix[ qp_scaled ]; + int qp_rem = p_Vid->qp_rem_matrix[ qp_scaled ]; + + int invLevelScale = currSlice->InvLevelScale4x4_Intra[pl][qp_rem][0][0]; + h264_int_block_t M4; + + // horizontal + M4[0][0]=blocks[0][0][0]; + M4[0][1]=blocks[1][0][0]; + M4[0][2]=blocks[4][0][0]; + M4[0][3]=blocks[5][0][0]; + M4[1][0]=blocks[2][0][0]; + M4[1][1]=blocks[3][0][0]; + M4[1][2]=blocks[6][0][0]; + M4[1][3]=blocks[7][0][0]; + M4[2][0]=blocks[8][0][0]; + M4[2][1]=blocks[9][0][0]; + M4[2][2]=blocks[12][0][0]; + M4[2][3]=blocks[13][0][0]; + M4[3][0]=blocks[10][0][0]; + M4[3][1]=blocks[11][0][0]; + M4[3][2]=blocks[14][0][0]; + M4[3][3]=blocks[15][0][0]; + + ihadamard4x4(M4); + + // vertical + blocks[0][0][0] = rshift_rnd((( M4[0][0] * invLevelScale) << qp_per), 6); + blocks[1][0][0] = rshift_rnd((( M4[0][1] * invLevelScale) << qp_per), 6); + blocks[4][0][0] = rshift_rnd((( M4[0][2] * invLevelScale) << qp_per), 6); + blocks[5][0][0] = rshift_rnd((( M4[0][3] * invLevelScale) << qp_per), 6); + blocks[2][0][0] = rshift_rnd((( M4[1][0] * invLevelScale) << qp_per), 6); + blocks[3][0][0] = rshift_rnd((( M4[1][1] * invLevelScale) << qp_per), 6); + blocks[6][0][0] = rshift_rnd((( M4[1][2] * invLevelScale) << qp_per), 6); + blocks[7][0][0] = rshift_rnd((( M4[1][3] * invLevelScale) << qp_per), 6); + blocks[8][0][0] = rshift_rnd((( M4[2][0] * invLevelScale) << qp_per), 6); + blocks[9][0][0] = rshift_rnd((( M4[2][1] * invLevelScale) << qp_per), 6); + blocks[12][0][0] = rshift_rnd((( M4[2][2] * invLevelScale) << qp_per), 6); + blocks[13][0][0] = rshift_rnd((( M4[2][3] * invLevelScale) << qp_per), 6); + blocks[10][0][0] = rshift_rnd((( M4[3][0] * invLevelScale) << qp_per), 6); + blocks[11][0][0] = rshift_rnd((( M4[3][1] * invLevelScale) << qp_per), 6); + blocks[14][0][0] = rshift_rnd((( M4[3][2] * invLevelScale) << qp_per), 6); + blocks[15][0][0] = rshift_rnd((( M4[3][3] * invLevelScale) << qp_per), 6); +} + + +void itrans_sp(h264_short_block_row_t *tblock, const h264_imgpel_macroblock_row_t *mb_pred, Macroblock *currMB, ColorPlane pl, int ioff, int joff) +{ + VideoParameters *p_Vid = currMB->p_Vid; + Slice *currSlice = currMB->p_Slice; + int i,j; + int ilev, icof; + + int qp = (currSlice->slice_type == SI_SLICE) ? currSlice->qs : p_Vid->qp; + int qp_per = p_Vid->qp_per_matrix[ qp ]; + int qp_rem = p_Vid->qp_rem_matrix[ qp ]; + + int qp_per_sp = p_Vid->qp_per_matrix[ currSlice->qs ]; + int qp_rem_sp = p_Vid->qp_rem_matrix[ currSlice->qs ]; + int q_bits_sp = Q_BITS + qp_per_sp; + int max_imgpel_value = p_Vid->max_pel_value_comp[pl]; + + const int (*InvLevelScale4x4) [4] = dequant_coef[qp_rem]; + const int (*InvLevelScale4x4SP)[4] = dequant_coef[qp_rem_sp]; + int **PBlock; + + get_mem2Dint(&PBlock, MB_BLOCK_SIZE, MB_BLOCK_SIZE); + + for (j=0; j< BLOCK_SIZE; ++j) + for (i=0; i< BLOCK_SIZE; ++i) + PBlock[j][i] = mb_pred[j+joff][i+ioff]; + + forward4x4(PBlock, PBlock, 0, 0); + + if(p_Vid->sp_switch || currSlice->slice_type==SI_SLICE) + { + for (j=0;j<BLOCK_SIZE;++j) + { + for (i=0;i<BLOCK_SIZE;++i) + { + // recovering coefficient since they are already dequantized earlier + icof = (tblock[j][i] >> qp_per) / InvLevelScale4x4[j][i]; + ilev = rshift_rnd_sf(iabs(PBlock[j][i]) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + ilev = isignab(ilev, PBlock[j][i]) + icof; + tblock[j][i] = ilev * InvLevelScale4x4SP[j][i] << qp_per_sp; + } + } + } + else + { + for (j=0;j<BLOCK_SIZE;++j) + { + for (i=0;i<BLOCK_SIZE;++i) + { + // recovering coefficient since they are already dequantized earlier + icof = (tblock[j][i] >> qp_per) / InvLevelScale4x4[j][i]; + ilev = PBlock[j][i] + ((icof * InvLevelScale4x4[j][i] * A[j][i] << qp_per) >> 6); + ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + tblock[j][i] = ilev * InvLevelScale4x4SP[j][i] << qp_per_sp; + } + } + } + + { + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + opt_itrans4x4(tblock, mb_pred, mb_rec, ioff, joff); + } + + free_mem2Dint(PBlock); +} + +void itrans_sp_cr(Macroblock *currMB, int uv) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + int i,j,ilev, icof, n2,n1; + int mp1[BLOCK_SIZE]; + int qp_per,qp_rem; + int qp_per_sp,qp_rem_sp,q_bits_sp; + int **PBlock; + + get_mem2Dint(&PBlock, MB_BLOCK_SIZE, MB_BLOCK_SIZE); + + + qp_per = p_Vid->qp_per_matrix[ ((p_Vid->qp < 0 ? p_Vid->qp : QP_SCALE_CR[p_Vid->qp]))]; + qp_rem = p_Vid->qp_rem_matrix[ ((p_Vid->qp < 0 ? p_Vid->qp : QP_SCALE_CR[p_Vid->qp]))]; + + qp_per_sp = p_Vid->qp_per_matrix[ ((currSlice->qs < 0 ? currSlice->qs : QP_SCALE_CR[currSlice->qs]))]; + qp_rem_sp = p_Vid->qp_rem_matrix[ ((currSlice->qs < 0 ? currSlice->qs : QP_SCALE_CR[currSlice->qs]))]; + q_bits_sp = Q_BITS + qp_per_sp; + + if (currSlice->slice_type == SI_SLICE) + { + qp_per = qp_per_sp; + qp_rem = qp_rem_sp; + } + + for (j=0; j < p_Vid->mb_cr_size_y; ++j) + { + for (i=0; i < p_Vid->mb_cr_size_x; ++i) + { + PBlock[j][i] = currSlice->mb_pred[uv + 1][j][i]; + currSlice->mb_pred[uv + 1][j][i] = 0; + } + } + + for (n2=0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1=0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE) + { + forward4x4(PBlock, PBlock, n2, n1); + } + } + + // 2X2 transform of DC coeffs. + mp1[0] = (PBlock[0][0] + PBlock[4][0] + PBlock[0][4] + PBlock[4][4]); + mp1[1] = (PBlock[0][0] - PBlock[4][0] + PBlock[0][4] - PBlock[4][4]); + mp1[2] = (PBlock[0][0] + PBlock[4][0] - PBlock[0][4] - PBlock[4][4]); + mp1[3] = (PBlock[0][0] - PBlock[4][0] - PBlock[0][4] + PBlock[4][4]); + + if (p_Vid->sp_switch || currSlice->slice_type == SI_SLICE) + { + for (n2=0; n2 < 2; ++n2 ) + { + for (n1=0; n1 < 2; ++n1 ) + { + //quantization fo predicted block + ilev = rshift_rnd_sf(iabs (mp1[n1+n2*2]) * quant_coef[qp_rem_sp][0][0], q_bits_sp + 1); + //addition + ilev = isignab(ilev, mp1[n1+n2*2]) + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2][n1]][0][0]; + //dequantization + mp1[n1+n2*2] =ilev * dequant_coef[qp_rem_sp][0][0] << qp_per_sp; + } + } + + for (n2 = 0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1 = 0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE) + { + for (j = 0; j < BLOCK_SIZE; ++j) + { + for (i = 0; i < BLOCK_SIZE; ++i) + { + // recovering coefficient since they are already dequantized earlier + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = (currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] >> qp_per) / dequant_coef[qp_rem][j][i]; + + //quantization of the predicted block + ilev = rshift_rnd_sf(iabs(PBlock[n2 + j][n1 + i]) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + //addition of the residual + ilev = isignab(ilev,PBlock[n2 + j][n1 + i]) + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] ; + // Inverse quantization + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = ilev * dequant_coef[qp_rem_sp][j][i] << qp_per_sp; + } + } + } + } + } + else + { + for (n2=0; n2 < 2; ++n2 ) + { + for (n1=0; n1 < 2; ++n1 ) + { + ilev = mp1[n1+n2*2] + (((currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2][n1]][0][0] * dequant_coef[qp_rem][0][0] * A[0][0]) << qp_per) >> 5); + ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][0][0], q_bits_sp + 1); + //ilev = isignab(rshift_rnd_sf(iabs(ilev)* quant_coef[qp_rem_sp][0][0], q_bits_sp + 1), ilev); + mp1[n1+n2*2] = ilev * dequant_coef[qp_rem_sp][0][0] << qp_per_sp; + } + } + + for (n2 = 0; n2 < p_Vid->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1 = 0; n1 < p_Vid->mb_cr_size_x; n1 += BLOCK_SIZE) + { + for (j = 0; j< BLOCK_SIZE; ++j) + { + for (i = 0; i< BLOCK_SIZE; ++i) + { + // recovering coefficient since they are already dequantized earlier + icof = (currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] >> qp_per) / dequant_coef[qp_rem][j][i]; + //dequantization and addition of the predicted block + ilev = PBlock[n2 + j][n1 + i] + ((icof * dequant_coef[qp_rem][j][i] * A[j][i] << qp_per) >> 6); + //quantization and dequantization + ilev = isign(ilev) * rshift_rnd_sf(iabs(ilev) * quant_coef[qp_rem_sp][j][i], q_bits_sp); + currSlice->cof4[uv + 1][cof4_pos_to_subblock[n2>>2][n1>>2]][j][i] = ilev * dequant_coef[qp_rem_sp][j][i] << qp_per_sp; + } + } + } + } + } + + currSlice->cof4[uv + 1][0][0][0] = (mp1[0] + mp1[1] + mp1[2] + mp1[3]) >> 1; + currSlice->cof4[uv + 1][1][0][0] = (mp1[0] + mp1[1] - mp1[2] - mp1[3]) >> 1; + currSlice->cof4[uv + 1][2][0][0] = (mp1[0] - mp1[1] + mp1[2] - mp1[3]) >> 1; + currSlice->cof4[uv + 1][3][0][0] = (mp1[0] - mp1[1] - mp1[2] + mp1[3]) >> 1; + + free_mem2Dint(PBlock); +} + +#if defined(_DEBUG) || !defined(_M_IX86) +void iMBtrans4x4(Macroblock *currMB, ColorPlane pl, int smb) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + + StorablePicture *dec_picture = p_Vid->dec_picture; + + VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1]: dec_picture->imgY; + + // =============== 4x4 itrans ================ + // ------------------------------------------- + if (smb) + { + h264_short_block_t *blocks = currSlice->cof4[pl]; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + + itrans_sp(blocks[0], mb_pred, currMB, pl, 0, 0); + itrans_sp(blocks[1], mb_pred, currMB, pl, 4, 0); + itrans_sp(blocks[2], mb_pred, currMB, pl, 0, 4); + itrans_sp(blocks[3], mb_pred, currMB, pl, 4, 4); + itrans_sp(blocks[4], mb_pred, currMB, pl, 8, 0); + itrans_sp(blocks[5], mb_pred, currMB, pl, 12, 0); + itrans_sp(blocks[6], mb_pred, currMB, pl, 8, 4); + itrans_sp(blocks[7], mb_pred, currMB, pl, 12, 4); + itrans_sp(blocks[8], mb_pred, currMB, pl, 0, 8); + itrans_sp(blocks[9], mb_pred, currMB, pl, 4, 8); + itrans_sp(blocks[10], mb_pred, currMB, pl, 0, 12); + itrans_sp(blocks[11], mb_pred, currMB, pl, 4, 12); + itrans_sp(blocks[12], mb_pred, currMB, pl, 8, 8); + itrans_sp(blocks[13], mb_pred, currMB, pl, 12, 8); + itrans_sp(blocks[14], mb_pred, currMB, pl, 8, 12); + itrans_sp(blocks[15], mb_pred, currMB, pl, 12, 12); + } + else if (currMB->is_lossless) + { + Inv_Residual_trans_4x4(currMB, pl, 0, 0); + Inv_Residual_trans_4x4(currMB, pl, 4, 0); + Inv_Residual_trans_4x4(currMB, pl, 0, 4); + Inv_Residual_trans_4x4(currMB, pl, 4, 4); + Inv_Residual_trans_4x4(currMB, pl, 8, 0); + Inv_Residual_trans_4x4(currMB, pl, 12, 0); + Inv_Residual_trans_4x4(currMB, pl, 8, 4); + Inv_Residual_trans_4x4(currMB, pl, 12, 4); + Inv_Residual_trans_4x4(currMB, pl, 0, 8); + Inv_Residual_trans_4x4(currMB, pl, 4, 8); + Inv_Residual_trans_4x4(currMB, pl, 0, 12); + Inv_Residual_trans_4x4(currMB, pl, 4, 12); + Inv_Residual_trans_4x4(currMB, pl, 8, 8); + Inv_Residual_trans_4x4(currMB, pl, 12, 8); + Inv_Residual_trans_4x4(currMB, pl, 8, 12); + Inv_Residual_trans_4x4(currMB, pl, 12, 12); + } + else + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + opt_itrans4x4(blocks[4], mb_pred, mb_rec, 8, 0); + opt_itrans4x4(blocks[5], mb_pred, mb_rec, 12, 0); + opt_itrans4x4(blocks[6], mb_pred, mb_rec, 8, 4); + opt_itrans4x4(blocks[7], mb_pred, mb_rec, 12, 4); + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + opt_itrans4x4(blocks[12], mb_pred, mb_rec, 8, 8); + opt_itrans4x4(blocks[13], mb_pred, mb_rec, 12, 8); + opt_itrans4x4(blocks[14], mb_pred, mb_rec, 8, 12); + opt_itrans4x4(blocks[15], mb_pred, mb_rec, 12, 12); + } + + // construct picture from 4x4 blocks + opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_rec[pl]); +} +#endif +void iMBtrans8x8(Macroblock *currMB, ColorPlane pl) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + VideoImage *curr_img = pl ? dec_picture->imgUV[pl - 1] : dec_picture->imgY; + + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + h264_imgpel_macroblock_row_t *mb_pred = currSlice->mb_pred[pl]; + h264_short_8x8block_t *mb_rres8 = currSlice->mb_rres8[pl]; + + if (currMB->is_lossless == FALSE) + { + opt_itrans8x8(mb_rec, mb_pred, mb_rres8[0], 0); + opt_itrans8x8(mb_rec, mb_pred, mb_rres8[1], 8); + opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[2], 0); + opt_itrans8x8(mb_rec+8, mb_pred+8, mb_rres8[3], 8); + } + else + { + itrans8x8_lossless(mb_rec, mb_pred, mb_rres8[0], 0); + itrans8x8_lossless(mb_rec, mb_pred, mb_rres8[1], 8); + itrans8x8_lossless(mb_rec+8, mb_pred+8, mb_rres8[2], 0); + itrans8x8_lossless(mb_rec+8, mb_pred+8, mb_rres8[3], 8); + } + + opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, mb_rec); +} + +void iTransform(Macroblock *currMB, ColorPlane pl, int smb) +{ + Slice *currSlice = currMB->p_Slice; + VideoParameters *p_Vid = currMB->p_Vid; + StorablePicture *dec_picture = p_Vid->dec_picture; + + int uv = pl-1; + + if ((currMB->cbp & 15) != 0 || smb) + { + if(currMB->luma_transform_size_8x8_flag == 0) // 4x4 inverse transform + { + iMBtrans4x4(currMB, pl, smb); + } + else // 8x8 inverse transform + { + iMBtrans8x8(currMB, pl); + } + } + else + { + VideoImage *curr_img = pl ? dec_picture->imgUV[uv] : dec_picture->imgY; + opt_copy_image_data_16x16_stride(curr_img, currMB->pix_x, currMB->pix_y, currSlice->mb_pred[pl]); + } +// TODO: fix 4x4 lossless + if (dec_picture->chroma_format_idc == YUV420) + { + VideoImage *curUV; + + for(uv=0;uv<2;++uv) + { + int pl = uv + 1; + + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + + // =============== 4x4 itrans ================ + // ------------------------------------------- + curUV = dec_picture->imgUV[uv]; + + if (!smb && (currMB->cbp>>4)) + { + if (currMB->is_lossless == FALSE) + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + else + { // lossless + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + itrans4x4_ls(blocks[0], mb_pred, mb_rec, 0, 0); + itrans4x4_ls(blocks[1], mb_pred, mb_rec, 4, 0); + itrans4x4_ls(blocks[2], mb_pred, mb_rec, 0, 4); + itrans4x4_ls(blocks[3], mb_pred, mb_rec, 4, 4); + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + } + else if (smb) + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + itrans_sp_cr(currMB, uv); + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec); + } + else + { + copy_image_data_8x8_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_pred); + } + } + } + else if (dec_picture->chroma_format_idc == YUV422) + { + VideoImage *curUV; + + for(uv=0;uv<2;++uv) + { + // =============== 4x4 itrans ================ + // ------------------------------------------- + int pl = uv + 1; + const h264_imgpel_macroblock_row_t *mb_pred=currSlice->mb_pred[pl]; + curUV = dec_picture->imgUV[uv]; + + if (!smb && (currMB->cbp>>4)) + { + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + const h264_short_block_t *blocks = currSlice->cof4[pl]; + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + + copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec, 8, 16); + } + else if (smb) + { + const h264_short_block_t *blocks = currSlice->cof4[pl]; + h264_imgpel_macroblock_row_t *mb_rec = currSlice->mb_rec[pl]; + + itrans_sp_cr(currMB, uv); + + opt_itrans4x4(blocks[0], mb_pred, mb_rec, 0, 0); + opt_itrans4x4(blocks[1], mb_pred, mb_rec, 4, 0); + opt_itrans4x4(blocks[2], mb_pred, mb_rec, 0, 4); + opt_itrans4x4(blocks[3], mb_pred, mb_rec, 4, 4); + opt_itrans4x4(blocks[8], mb_pred, mb_rec, 0, 8); + opt_itrans4x4(blocks[9], mb_pred, mb_rec, 4, 8); + opt_itrans4x4(blocks[10], mb_pred, mb_rec, 0, 12); + opt_itrans4x4(blocks[11], mb_pred, mb_rec, 4, 12); + + copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_rec, 8, 16); + } + else + { + copy_image_data_stride(curUV,currMB->pix_c_x, currMB->pix_c_y, mb_pred, 8, 16); + } + } + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (16x16) + ************************************************************************************* + */ +void copy_image_data_16x16(imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x) +{ + int j; + for(j=0; j<MB_BLOCK_SIZE; ++j) + { + memcpy(&imgBuf1[j][dest_x], &imgBuf2[j][src_x], MB_BLOCK_SIZE * sizeof (imgpel)); + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (16x16) + ************************************************************************************* + */ +#ifdef _M_IX86 +void copy_image_data_16x16_stride_sse(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + _asm + { + mov eax, dest + mov ecx, destination_stride + mov edx, source; + movaps xmm0, 0[edx] + movaps xmm1, 16[edx] + movaps xmm2, 32[edx] + movaps xmm3, 64[edx] + movups [eax], xmm0 // dest[0] + movups [eax+ecx], xmm1 // dest[1] + movups [eax+2*ecx], xmm2 // dest[2] + movups [eax+4*ecx], xmm3 // dest[4] + + movaps xmm0, 48[edx] + movaps xmm1, 96[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] + movups [eax+ecx], xmm0 // dest[3] + movups [eax+4*ecx], xmm1 // dest[6] + + movaps xmm0, 80[edx] + movaps xmm1, 128[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[4] from start) + movups [eax+ecx], xmm0 // dest[5] + movups [eax+4*ecx], xmm1 // dest[8] + + movaps xmm0, 112[edx] + movaps xmm1, 160[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[6] from start) + movups [eax+ecx], xmm0 // dest[7] + movups [eax+4*ecx], xmm1 // dest[10] + + movaps xmm0, 144[edx] + movaps xmm1, 192[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[8] from start) + movups [eax+ecx], xmm0 // dest[9] + movups [eax+4*ecx], xmm1 // dest[12] + + movaps xmm0, 176[edx] + movaps xmm1, 224[edx] + lea eax, [eax+2*ecx] // dest = &dest[2] (dest[10] from start) + movups [eax+ecx], xmm0 // dest[11] + movups [eax+4*ecx], xmm1 // dest[14] + + movaps xmm0, 208[edx] + movaps xmm1, 240[edx] + lea eax, [eax+ecx] // dest = &dest[1] (dest[11] from start) + movups [eax+2*ecx], xmm0 // dest[13] + movups [eax+4*ecx], xmm1 // dest[15] + } +} +#endif + +void copy_image_data_16x16_stride_c(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + + int j; + for(j=0; j<MB_BLOCK_SIZE; j++) + { + memcpy(dest, source[j], MB_BLOCK_SIZE * sizeof (imgpel)); + dest+=destination_stride; + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (8x8) + ************************************************************************************* + */ +void copy_image_data_8x8_stride2(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2, int src_x, int src_y) +{ +#ifdef _M_IX86 + ptrdiff_t destination_stride = destination->stride; + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + _asm + { + mov eax, src_y + shl eax, 4 + add eax, src_x + add eax, imgBuf2 + + mov edx, dest + mov ecx, destination_stride + + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 16[eax] + movq mm2, MMWORD PTR 32[eax] + movq mm3, MMWORD PTR 48[eax] + movq mm4, MMWORD PTR 64[eax] + movq mm5, MMWORD PTR 80[eax] + movq mm6, MMWORD PTR 96[eax] + movq mm7, MMWORD PTR 112[eax] + + movntq [edx], mm0 + movntq [edx+ecx], mm1 + movntq [edx+2*ecx], mm2 + movntq [edx+4*ecx], mm4 + add edx, ecx + movntq 0[edx+2*ecx], mm3 + movntq 0[edx+4*ecx], mm5 + add edx, ecx + movntq 0[edx+4*ecx], mm6 + add edx, ecx + movntq 0[edx+4*ecx], mm7 + } +#else + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + int j; + for(j = 0; j < BLOCK_SIZE_8x8; ++j) + { + memcpy(dest, &imgBuf2[src_y+j][src_x], BLOCK_SIZE_8x8 * sizeof (imgpel)); + dest+=destination_stride; + } +#endif + +} + +void copy_image_data_8x8_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2) +{ +#ifdef _M_IX86 + ptrdiff_t destination_stride = destination->stride; + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + _asm + { + mov eax, imgBuf2 + mov edx, dest + mov ecx, destination_stride + + movq mm0, MMWORD PTR 0[eax] + movq mm1, MMWORD PTR 16[eax] + movq mm2, MMWORD PTR 32[eax] + movq mm3, MMWORD PTR 48[eax] + movq mm4, MMWORD PTR 64[eax] + movq mm5, MMWORD PTR 80[eax] + movq mm6, MMWORD PTR 96[eax] + movq mm7, MMWORD PTR 112[eax] + + movntq [edx], mm0 + movntq [edx+ecx], mm1 + movntq [edx+2*ecx], mm2 + movntq [edx+4*ecx], mm4 + add edx, ecx + movntq 0[edx+2*ecx], mm3 + movntq 0[edx+4*ecx], mm5 + add edx, ecx + movntq 0[edx+4*ecx], mm6 + add edx, ecx + movntq 0[edx+4*ecx], mm7 + } +#else + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + int j; + for(j = 0; j < BLOCK_SIZE_8x8; ++j) + { + memcpy(dest, &imgBuf2[j][0], BLOCK_SIZE_8x8 * sizeof (imgpel)); + dest+=destination_stride; + } +#endif +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (4x4) + ************************************************************************************* + */ + +void copy_image_data_4x4_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t source, int src_x, int src_y) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + h264_imgpel_macroblock_row_t *src = (h264_imgpel_macroblock_row_t *)source[src_y]; /* cast is for const */ + + int j; + for(j = 0; j < BLOCK_SIZE; ++j) + { + memcpy(dest, &src[j][src_x], BLOCK_SIZE * sizeof (imgpel)); + dest+=destination_stride; + } +} + +/*! + ************************************************************************************* + * \brief + * Copy ImgPel Data from one structure to another (8x8) + ************************************************************************************* + */ +void copy_image_data(imgpel **imgBuf1, imgpel **imgBuf2, int dest_x, int src_x, int width, int height) +{ + int j; + for(j = 0; j < height; ++j) + { + memcpy(&imgBuf1[j][dest_x], &imgBuf2[j][src_x], width * sizeof (imgpel)); + } +} + +void copy_image_data_stride(VideoImage *destination, int dest_x, int dest_y, const h264_imgpel_macroblock_t imgBuf2, int width, int height) +{ + ptrdiff_t destination_stride = destination->stride; // in case the compiler doesn't optimize this + imgpel *dest = destination->base_address + destination_stride * dest_y + dest_x; + #ifdef H264_IPP + IppiSize roi = {width,height}; + ippiCopy_8u_C1R(imgBuf2[0], sizeof(imgBuf2[0]), dest, destination_stride, roi); +#else + int j; + for(j = 0; j < height; ++j) + { + memcpy(dest, imgBuf2[j], width * sizeof (imgpel)); + dest+=destination_stride; + } +#endif +} |