1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
|
/*!
************************************************************************
* \file
* global.h
* \brief
* global definitions for H.264 decoder.
* \author
* Copyright (C) 1999 Telenor Satellite Services,Norway
* Ericsson Radio Systems, Sweden
*
* Inge Lille-Langoy <inge.lille-langoy@telenor.com>
*
* Telenor Satellite Services
* Keysers gt.13 tel.: +47 23 13 86 98
* N-0130 Oslo,Norway fax.: +47 22 77 79 80
*
* Rickard Sjoberg <rickard.sjoberg@era.ericsson.se>
*
* Ericsson Radio Systems
* KI/ERA/T/VV
* 164 80 Stockholm, Sweden
*
************************************************************************
*/
#ifndef _GLOBAL_H_
#define _GLOBAL_H_
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <sys/timeb.h>
#include <bfc/platform/types.h>
#include "win32.h"
#include "defines.h"
#include "ifunctions.h"
#include "parsetcommon.h"
#include "types.h"
#include "frame.h"
#include "nalucommon.h"
#include "memcache.h"
#include <mmintrin.h>
#ifdef H264_IPP
//#include "../tools/staticlib/ipp_px.h"
#include "ippdefs.h"
#include "ippcore.h"
#include "ipps.h"
#include "ippi.h"
#include "ippvc.h"
#endif
/* benski> not the best place for this but it works for now */
#ifdef _M_IX86
// must be a multiple of 16
#pragma warning(disable: 4799)
static inline void memzero_cache32(void *dst, unsigned long i)
{
__asm {
pxor mm0, mm0
mov edi, dst
loopwrite:
movq 0[edi], mm0
movq 8[edi], mm0
movq 16[edi], mm0
movq 24[edi], mm0
lea edi, [edi+32]
sub i, 32
jg loopwrite
}
}
static inline void memzero_fast32(void *dst, unsigned long i)
{
__asm {
pxor mm0, mm0
mov edi, dst
loopwrite:
movntq 0[edi], mm0
movntq 8[edi], mm0
movntq 16[edi], mm0
movntq 24[edi], mm0
lea edi, [edi+32]
sub i, 32
jg loopwrite
}
}
static inline void memzero64(void *dst)
{
__asm {
pxor mm0, mm0
mov edi, dst
movq 0[edi], mm0
movq 8[edi], mm0
movq 16[edi], mm0
movq 24[edi], mm0
movq 32[edi], mm0
movq 40[edi], mm0
movq 48[edi], mm0
movq 56[edi], mm0
}
}
static inline void memzero128(void *dst)
{
__asm {
pxor mm0, mm0
mov edi, dst
movq 0[edi], mm0
movq 8[edi], mm0
movq 16[edi], mm0
movq 24[edi], mm0
movq 32[edi], mm0
movq 40[edi], mm0
movq 48[edi], mm0
movq 56[edi], mm0
movq 64[edi], mm0
movq 72[edi], mm0
movq 80[edi], mm0
movq 88[edi], mm0
movq 96[edi], mm0
movq 104[edi], mm0
movq 112[edi], mm0
movq 120[edi], mm0
}
}
static inline void memzero24(void *dst)
{
__asm {
pxor mm0, mm0
mov edi, dst
movq 0[edi], mm0
movq 8[edi], mm0
movq 16[edi], mm0
}
}
static inline void memzero48(void *dst)
{
__asm {
pxor mm0, mm0
mov edi, dst
movq 0[edi], mm0
movq 8[edi], mm0
movq 16[edi], mm0
movq 24[edi], mm0
movq 32[edi], mm0
movq 40[edi], mm0
}
}
static inline void memzero16(void *dst)
{
__asm {
pxor mm0, mm0
mov edi, dst
movq 0[edi], mm0
movq 8[edi], mm0
}
}
static inline void memzero8(void *dst)
{
__asm {
pxor mm0, mm0
mov edi, dst
movq 0[edi], mm0
}
}
static inline void memset_fast_end()
{
_mm_empty();
}
// Very optimized memcpy() routine for all AMD Athlon and Duron family.
// This code uses any of FOUR different basic copy methods, depending
// on the transfer size.
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
// "Streaming Store"), and also uses the software prefetchnta instructions,
// be sure youre running on Athlon/Duron or other recent CPU before calling!
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop".
#define IN_CACHE_COPY 64 * 1024 // upper limit for movq/movq copy w/SW prefetch
// Next is a copy that uses the MMX registers to copy 8 bytes at a time,
// also using the "unrolled loop" optimization. This code uses
// the software prefetch instruction to get the data into the cache.
#define UNCACHED_COPY 197 * 1024 // upper limit for movq/movntq w/SW prefetch
// For larger blocks, which will spill beyond the cache, its faster to
// use the Streaming Store instruction MOVNTQ. This write instruction
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
// USE 64 * 1024 FOR THIS VALUE IF YOURE ALWAYS FILLING A "CLEAN CACHE"
#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch. The technique is great for
// getting maximum read bandwidth, especially in DDR memory systems.
// Inline assembly syntax for use with Visual C++
static void * memcpy_amd(void *dest, const void *src, size_t n)
{
__asm {
mov ecx, [n] // number of bytes to copy
mov edi, [dest] // destination
mov esi, [src] // source
mov ebx, ecx // keep a copy of count
cld
cmp ecx, TINY_BLOCK_COPY
jb $memcpy_ic_3 // tiny? skip mmx copy
cmp ecx, 32*1024 // dont align between 32k-64k because
jbe $memcpy_do_align // it appears to be slower
cmp ecx, 64*1024
jbe $memcpy_align_done
$memcpy_do_align:
mov ecx, 8 // a trick thats faster than rep movsb...
sub ecx, edi // align destination to qword
and ecx, 111b // get the low bits
sub ebx, ecx // update copy count
neg ecx // set up to jump into the array
add ecx, offset $memcpy_align_done
jmp ecx // jump to array of movsbs
align 4
movsb
movsb
movsb
movsb
movsb
movsb
movsb
movsb
$memcpy_align_done: // destination is dword aligned
mov ecx, ebx // number of bytes left to copy
shr ecx, 6 // get 64-byte block count
jz $memcpy_ic_2 // finish the last few bytes
cmp ecx, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
jae $memcpy_uc_test
// This is small block copy that uses the MMX registers to copy 8 bytes
// at a time. It uses the "unrolled loop" optimization, and also uses
// the software prefetch instruction to get the data into the cache.
align 16
$memcpy_ic_1: // 64-byte block copies, in-cache copy
prefetchnta [esi + (200*64/34+192)] // start reading ahead
movq mm0, [esi+0] // read 64 bits
movq mm1, [esi+8]
movq [edi+0], mm0 // write 64 bits
movq [edi+8], mm1 // note: the normal movq writes the
movq mm2, [esi+16] // data to cache// a cache line will be
movq mm3, [esi+24] // allocated as needed, to store the data
movq [edi+16], mm2
movq [edi+24], mm3
movq mm0, [esi+32]
movq mm1, [esi+40]
movq [edi+32], mm0
movq [edi+40], mm1
movq mm2, [esi+48]
movq mm3, [esi+56]
movq [edi+48], mm2
movq [edi+56], mm3
add esi, 64 // update source pointer
add edi, 64 // update destination pointer
dec ecx // count down
jnz $memcpy_ic_1 // last 64-byte block?
$memcpy_ic_2:
mov ecx, ebx // has valid low 6 bits of the byte count
$memcpy_ic_3:
shr ecx, 2 // dword count
and ecx, 1111b // only look at the "remainder" bits
neg ecx // set up to jump into the array
add ecx, offset $memcpy_last_few
jmp ecx // jump to array of movsds
$memcpy_uc_test:
cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
jae $memcpy_bp_1
$memcpy_64_test:
or ecx, ecx // _tail end of block prefetch will jump here
jz $memcpy_ic_2 // no more 64-byte blocks left
// For larger blocks, which will spill beyond the cache, its faster to
// use the Streaming Store instruction MOVNTQ. This write instruction
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
align 16
$memcpy_uc_1: // 64-byte blocks, uncached copy
prefetchnta [esi + (200*64/34+192)] // start reading ahead
movq mm0,[esi+0] // read 64 bits
add edi,64 // update destination pointer
movq mm1,[esi+8]
add esi,64 // update source pointer
movq mm2,[esi-48]
movntq [edi-64], mm0 // write 64 bits, bypassing the cache
movq mm0,[esi-40] // note: movntq also prevents the CPU
movntq [edi-56], mm1 // from READING the destination address
movq mm1,[esi-32] // into the cache, only to be over-written
movntq [edi-48], mm2 // so that also helps performance
movq mm2,[esi-24]
movntq [edi-40], mm0
movq mm0,[esi-16]
movntq [edi-32], mm1
movq mm1,[esi-8]
movntq [edi-24], mm2
movntq [edi-16], mm0
dec ecx
movntq [edi-8], mm1
jnz $memcpy_uc_1 // last 64-byte block?
jmp $memcpy_ic_2 // almost done
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch, in this case.
// The technique is great for getting maximum read bandwidth,
// especially in DDR memory systems.
$memcpy_bp_1: // large blocks, block prefetch copy
cmp ecx, CACHEBLOCK // big enough to run another prefetch loop?
jl $memcpy_64_test // no, back to regular uncached copy
mov eax, CACHEBLOCK / 2 // block prefetch loop, unrolled 2X
add esi, CACHEBLOCK * 64 // move to the top of the block
align 16
$memcpy_bp_2:
mov edx, [esi-64] // grab one address per cache line
mov edx, [esi-128] // grab one address per cache line
sub esi, 128 // go reverse order
dec eax // count down the cache lines
jnz $memcpy_bp_2 // keep grabbing more lines into cache
mov eax, CACHEBLOCK // now that its in cache, do the copy
align 16
$memcpy_bp_3:
movq mm0, [esi ] // read 64 bits
movq mm1, [esi+ 8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
add esi, 64 // update source pointer
movntq [edi ], mm0 // write 64 bits, bypassing cache
movntq [edi+ 8], mm1 // note: movntq also prevents the CPU
movntq [edi+16], mm2 // from READING the destination address
movntq [edi+24], mm3 // into the cache, only to be over-written,
movntq [edi+32], mm4 // so that also helps performance
movntq [edi+40], mm5
movntq [edi+48], mm6
movntq [edi+56], mm7
add edi, 64 // update dest pointer
dec eax // count down
jnz $memcpy_bp_3 // keep copying
sub ecx, CACHEBLOCK // update the 64-byte block count
jmp $memcpy_bp_1 // keep processing chunks
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop". Then it handles the last few bytes.
align 4
movsd
movsd // perform last 1-15 dword copies
movsd
movsd
movsd
movsd
movsd
movsd
movsd
movsd // perform last 1-7 dword copies
movsd
movsd
movsd
movsd
movsd
movsd
$memcpy_last_few: // dword aligned from before movsds
mov ecx, ebx // has valid low 2 bits of the byte count
and ecx, 11b // the last few cows must come home
jz $memcpy_final // no more, lets leave
rep movsb // the last 1, 2, or 3 bytes
$memcpy_final:
// emms // clean up the MMX state
sfence // flush the write buffer
mov eax, [dest] // ret value = destination pointer
}
}
#elif defined(_M_X64)
static inline void memzero24(void *dst)
{
int32_t j;
int32_t *d = (int32_t *)dst;
for (j=0;j<24;j+=4)
{
d[j] = 0;
}
}
static inline void memset_fast_end() {}
#else
static inline void memzero_fast16(void *dst, unsigned long i)
{
int32_t j;
int32_t *d = (int32_t *)dst;
for (j=0;j<i;j+=4)
{
d[j] = 0;
}
}
static inline void memzero24(void *dst)
{
int32_t j;
int32_t *d = (int32_t *)dst;
for (j=0;j<24;j+=4)
{
d[j] = 0;
}
}
static inline void memset_fast_end() {}
#endif
#define UNDEFINED_REFERENCE ((int)0x80000000)
typedef int32_t h264_ref_t;
#define ET_SIZE 300 //!< size of error text buffer
extern char errortext[ET_SIZE]; //!< buffer for error message for exit with error()
extern int sse2_flag, mmx_flag, sse_flag, sse3_flag, sse4_1_flag;
/***********************************************************************
* T y p e d e f i n i t i o n s f o r J M
***********************************************************************
*/
typedef enum
{
LumaComp = 0,
CrComp = 1,
CbComp = 2
} Color_Component;
/***********************************************************************
* D a t a t y p e s f o r C A B A C
***********************************************************************
*/
typedef struct pix_pos
{
int available;
int mb_addr;
short x;
short y;
short pos_x;
short pos_y;
} PixelPos;
//! struct to characterize the state of the arithmetic coding engine
typedef struct
{
unsigned int Drange;
unsigned int Dvalue;
int DbitsLeft;
byte *Dcodestrm;
int *Dcodestrm_len;
} DecodingEnvironment;
typedef DecodingEnvironment *DecodingEnvironmentPtr;
typedef short MotionVector[2];
//! definition of motion parameters
typedef struct pic_motion
{
h264_ref_t ref_pic_id;
h264_ref_t ref_id;
MotionVector mv;
char ref_idx;
} PicMotion;
// TODO: benski> might be more efficient to make a [list][subblock_y][subblock_x] array of these values instead of parallel arrays
typedef struct motion_params
{
PicMotion **motion[2];
byte ** moving_block;
} MotionParams;
//! struct for context management
typedef struct
{
uint16_t state; // index into state-table CP
unsigned char MPS; // Least Probable Symbol 0/1 CP
unsigned char dummy; // for alignment
} BiContextType;
typedef BiContextType *BiContextTypePtr;
/**********************************************************************
* C O N T E X T S F O R T M L S Y N T A X E L E M E N T S
**********************************************************************
*/
#define NUM_MB_TYPE_CTX 11
#define NUM_B8_TYPE_CTX 9
#define NUM_MV_RES_CTX 10
#define NUM_REF_NO_CTX 6
#define NUM_DELTA_QP_CTX 4
#define NUM_MB_AFF_CTX 4
#define NUM_TRANSFORM_SIZE_CTX 3
// structures that will be declared somewhere else
struct storable_picture;
struct datapartition;
struct syntaxelement;
typedef struct
{
BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX];
BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX];
BiContextType mv_res_contexts [2][NUM_MV_RES_CTX];
BiContextType ref_no_contexts [2][NUM_REF_NO_CTX];
BiContextType delta_qp_contexts[NUM_DELTA_QP_CTX];
BiContextType mb_aff_contexts [NUM_MB_AFF_CTX];
} MotionInfoContexts;
#define NUM_IPR_CTX 2
#define NUM_CIPR_CTX 4
#define NUM_CBP_CTX 4
#define NUM_BCBP_CTX 4
#define NUM_MAP_CTX 15
#define NUM_LAST_CTX 15
#define NUM_ONE_CTX 5
#define NUM_ABS_CTX 5
typedef struct
{
BiContextType transform_size_contexts [NUM_TRANSFORM_SIZE_CTX];
BiContextType ipr_contexts [NUM_IPR_CTX];
BiContextType cipr_contexts[NUM_CIPR_CTX];
BiContextType cbp_contexts [3][NUM_CBP_CTX];
BiContextType bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX];
BiContextType map_contexts [2][NUM_BLOCK_TYPES][NUM_MAP_CTX+1]; // +1 for better alignment
BiContextType last_contexts[2][NUM_BLOCK_TYPES][NUM_LAST_CTX+1]; // +1 for better alignment
BiContextType one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX];
BiContextType abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX];
} TextureInfoContexts;
//*********************** end of data type definition for CABAC *******************
/***********************************************************************
* N e w D a t a t y p e s f o r T M L
***********************************************************************
*/
/*! Buffer structure for decoded reference picture marking commands */
typedef struct DecRefPicMarking_s
{
int memory_management_control_operation;
int difference_of_pic_nums_minus1;
int long_term_pic_num;
int long_term_frame_idx;
int max_long_term_frame_idx_plus1;
struct DecRefPicMarking_s *Next;
} DecRefPicMarking_t;
//! definition of pic motion parameters
typedef struct pic_motion_params2
{
h264_ref_t ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x]
h264_ref_t ref_id; //!< reference picture identifier [list][subblock_y][subblock_x]
short mv[2]; //!< motion vector [list][subblock_x][subblock_y][component]
char ref_idx; //!< reference picture [list][subblock_y][subblock_x]
byte mb_field; //!< field macroblock indicator
byte field_frame; //!< indicates if co_located is field or frame.
} PicMotionParams2;
//! Macroblock
typedef struct macroblock
{
struct slice *p_Slice; //!< pointer to the current slice
struct img_par *p_Vid; //!< pointer to VideoParameters
struct inp_par *p_Inp;
int mbAddrX; //!< current MB address
int mb_x;
int mb_y;
int block_x;
int block_y;
int block_y_aff;
int pix_x;
int pix_y;
int pix_c_x;
int pix_c_y;
int subblock_x;
int subblock_y;
int qp; //!< QP luma
int qpc[2]; //!< QP chroma
int qp_scaled[MAX_PLANE]; //!< QP scaled for all comps.
Boolean is_lossless;
Boolean is_intra_block;
Boolean is_v_block;
short slice_nr;
short delta_quant; //!< for rate control
struct macroblock *mb_up; //!< pointer to neighboring MB (CABAC)
struct macroblock *mb_left; //!< pointer to neighboring MB (CABAC)
// some storage of macroblock syntax elements for global access
int mb_type;
short mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2]; //!< indices correspond to [forw,backw][block_y][block_x][x,y]
int cbp;
int64 cbp_blk [3];
int64 cbp_bits [3];
int64 cbp_bits_8x8[3];
int i16mode;
char b8mode[4];
char b8pdir[4];
char ei_flag; //!< error indicator flag that enables concealment
char dpl_flag; //!< error indicator flag that signals a missing data partition
char ipmode_DPCM;
short DFDisableIdc;
short DFAlphaC0Offset;
short DFBetaOffset;
char c_ipred_mode; //!< chroma intra prediction mode
Boolean mb_field;
int skip_flag;
int mb_addr_left, mb_addr_up, mb_addr_upper_right, mb_addr_upper_left;
Boolean mb_avail_left, mb_avail_up, mb_avail_upper_right, mb_avail_upper_left;
Boolean luma_transform_size_8x8_flag;
Boolean NoMbPartLessThan8x8Flag;
void (*itrans_8x8)(struct macroblock *currMB, ColorPlane pl, int ioff, int joff);
void (*GetMVPredictor) (struct macroblock *currMB, PixelPos *block,
short pmv[2], short ref_frame, struct pic_motion **motion, int mb_x, int mb_y, int blockshape_x, int blockshape_y);
int (*read_and_store_CBP_block_bit) (struct macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type);
char (*readRefPictureIdx) (struct syntaxelement *currSE, struct datapartition *dP, int list);
} Macroblock;
//! Syntaxelement
typedef struct syntaxelement
{
int value1; //!< numerical value of syntax element
int value2; //!< for blocked symbols, e.g. run/level
int len; //!< length of code
//int inf; //!< info part of CAVLC code
#if TRACE
#define TRACESTRING_SIZE 100 //!< size of trace string
char tracestring[TRACESTRING_SIZE]; //!< trace string
#endif
//! for mapping of CAVLC to syntaxElement
void (*mapping)(int len, int info, int *value1, int *value2);
} SyntaxElement;
//! Bitstream
typedef struct
{
// CABAC Decoding
int read_len; //!< actual position in the codebuffer, CABAC only
int code_len; //!< overall codebuffer length, CABAC only
// CAVLC Decoding
int frame_bitoffset; //!< actual position in the codebuffer, bit-oriented, CAVLC only
int bitstream_length; //!< over codebuffer lnegth, byte oriented, CAVLC only
byte *streamBuffer; //!< actual codebuffer for read bytes
} Bitstream;
/* === 4x4 block typedefs === */
// 32 bit precision
typedef int h264_int_block_row_t[BLOCK_SIZE];
typedef h264_int_block_row_t h264_int_block_t[BLOCK_SIZE];
// 16 bit precision
typedef int16_t h264_short_block_row_t[BLOCK_SIZE];
typedef h264_short_block_row_t h264_short_block_t[BLOCK_SIZE];
// 8 bit precision
/* === 8x8 block typedefs === */
// 32 bit precision
typedef int h264_int_8x8block_row_t[BLOCK_SIZE_8x8];
typedef h264_int_8x8block_row_t h264_int_8x8block_t[BLOCK_SIZE_8x8];
// 16 bit precision
typedef int16_t h264_short_8x8block_row_t[BLOCK_SIZE_8x8];
typedef h264_short_8x8block_row_t h264_short_8x8block_t[BLOCK_SIZE_8x8];
// 8 bit precision
typedef imgpel h264_imgpel_8x8block_row_t[BLOCK_SIZE_8x8];
typedef h264_imgpel_8x8block_row_t h264_imgpel_8x8block_t[BLOCK_SIZE_8x8];
/* === 16x16 block typedefs === */
// 32 bit precision
typedef int h264_int_macroblock_row_t[MB_BLOCK_SIZE];
typedef h264_int_macroblock_row_t h264_int_macroblock_t[MB_BLOCK_SIZE];
// 16 bit precision
typedef int16_t h264_short_macroblock_row_t[MB_BLOCK_SIZE];
typedef h264_short_macroblock_row_t h264_short_macroblock_t[MB_BLOCK_SIZE];
// 8 bit precision
typedef imgpel h264_imgpel_macroblock_row_t[MB_BLOCK_SIZE];
typedef h264_imgpel_macroblock_row_t h264_imgpel_macroblock_t[MB_BLOCK_SIZE];
typedef int h264_pic_position[2];
typedef byte h264_4x4_byte[BLOCK_SIZE][BLOCK_SIZE];
typedef h264_4x4_byte h264_nz_coefficient[3];
//! DataPartition
typedef struct datapartition
{
Bitstream *bitstream;
DecodingEnvironment de_cabac;
} DataPartition;
//! Slice
typedef struct slice
{
struct img_par *p_Vid;
struct inp_par *p_Inp;
pic_parameter_set_rbsp_t *active_pps;
seq_parameter_set_rbsp_t *active_sps;
struct colocated_params *p_colocated;
struct colocated_params *Co_located_JV[MAX_PLANE]; //!< p_colocated to be used during 4:4:4 independent mode decoding
int mb_aff_frame_flag;
int direct_spatial_mv_pred_flag; //!< Indicator for direct mode type (1 for Spatial, 0 for Temporal)
int num_ref_idx_l0_active; //!< number of available list 0 references
int num_ref_idx_l1_active; //!< number of available list 1 references
int qp;
int slice_qp_delta;
int qs;
int slice_qs_delta;
int slice_type; //!< slice type
int model_number; //!< cabac model number
PictureStructure structure; //!< Identify picture structure type
int start_mb_nr; //!< MUST be set by NAL even in case of ei_flag == 1
int max_part_nr;
int dp_mode; //!< data partitioning mode
int last_dquant;
// int last_mb_nr; //!< only valid when entropy coding == CABAC
DataPartition *partArr; //!< array of partitions
MotionInfoContexts *mot_ctx; //!< pointer to struct of context models for use in CABAC
TextureInfoContexts *tex_ctx; //!< pointer to struct of context models for use in CABAC
int mvscale[6][MAX_REFERENCE_PICTURES];
int ref_pic_list_reordering_flag_l0;
int *reordering_of_pic_nums_idc_l0;
int *abs_diff_pic_num_minus1_l0;
int *long_term_pic_idx_l0;
int ref_pic_list_reordering_flag_l1;
int *reordering_of_pic_nums_idc_l1;
int *abs_diff_pic_num_minus1_l1;
int *long_term_pic_idx_l1;
short DFDisableIdc; //!< Disable deblocking filter on slice
short DFAlphaC0Offset; //!< Alpha and C0 offset for filtering slice
short DFBetaOffset; //!< Beta offset for filtering slice
int pic_parameter_set_id; //!<the ID of the picture parameter set the slice is reffering to
int dpB_NotPresent; //!< non-zero, if data partition B is lost
int dpC_NotPresent; //!< non-zero, if data partition C is lost
__declspec(align(32)) h264_imgpel_macroblock_t mb_pred[MAX_PLANE];
__declspec(align(32)) h264_imgpel_macroblock_t mb_rec[MAX_PLANE];
__declspec(align(32)) union
{
__declspec(align(32)) h264_short_8x8block_t mb_rres8[MAX_PLANE][4];
__declspec(align(32)) h264_short_macroblock_t cof[MAX_PLANE];
__declspec(align(32)) h264_short_block_t cof4[MAX_PLANE][16]; // TODO: get this to work, one of these days
__declspec(align(32)) h264_short_macroblock_t ipcm[MAX_PLANE];
};
int cofu[16];
// Scaling matrix info
int InvLevelScale4x4_Intra[3][6][4][4];
int InvLevelScale4x4_Inter[3][6][4][4];
int InvLevelScale8x8_Intra[3][6][64];
int InvLevelScale8x8_Inter[3][6][64];
int *qmatrix[12];
// Cabac
// TODO: we could optimize coefficient reading by storing the levels/runs instead of coefficients
int16_t coeff[64]; // one more for EOB
int coeff_ctr;
int pos;
//weighted prediction
unsigned int apply_weights;
unsigned int luma_log2_weight_denom;
unsigned int chroma_log2_weight_denom;
int wp_weight[2][MAX_REFERENCE_PICTURES][3]; // weight in [list][index][component] order
int wp_offset[6][MAX_REFERENCE_PICTURES][3]; // offset in [list][index][component] order
int wbp_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; //weight in [list][fw_index][bw_index][component] order
int wp_round_luma;
int wp_round_chroma;
void (*read_CBP_and_coeffs_from_NAL) (Macroblock *currMB);
int (*decode_one_component ) (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, struct storable_picture *dec_picture);
int (*readSlice ) (struct img_par *, struct inp_par *);
int (*nal_startcode_follows ) (struct slice*, int );
void (*read_motion_info_from_NAL) (Macroblock *currMB);
void (*read_one_macroblock ) (Macroblock *currMB);
void (*interpret_mb_mode ) (Macroblock *currMB);
void (*compute_colocated ) (struct slice *currSlice, struct colocated_params *p, struct storable_picture **listX[6]);
void (*linfo_cbp_intra) (int len,int info,int *cbp, int *dummy);
void (*linfo_cbp_inter) (int len,int info,int *cbp, int *dummy);
} Slice;
//****************************** ~DM ***********************************
// image parameters
typedef struct img_par
{
struct inp_par *p_Inp;
pic_parameter_set_rbsp_t *active_pps;
seq_parameter_set_rbsp_t *active_sps;
seq_parameter_set_rbsp_t SeqParSet[MAXSPS];
pic_parameter_set_rbsp_t PicParSet[MAXPPS];
struct sei_params *p_SEI;
struct old_slice_par *old_slice;
int number; //!< frame number
unsigned int current_mb_nr; // bitstream order
unsigned int num_dec_mb;
short current_slice_nr;
int *intra_block;
int qp; //!< quant for the current frame
int sp_switch; //!< 1 for switching sp, 0 for normal sp
int type; //!< image type INTER/INTRA
int width;
int height;
int width_cr; //!< width chroma
int height_cr; //!< height chroma
int mb_x;
int mb_y;
int block_x;
int block_y;
int pix_c_x;
int pix_c_y;
int allrefzero;
byte **ipredmode; //!< prediction type [90][74]
h264_nz_coefficient *nz_coeff;
int **siblock;
int cod_counter; //!< Current count of number of skipped macroblocks in a row
int structure; //!< Identify picture structure type
Slice *currentSlice; //!< pointer to current Slice data struct
Macroblock *mb_data; //!< array containing all MBs of a whole frame
Macroblock *mb_data_JV[MAX_PLANE]; //!< mb_data to be used for 4:4:4 independent mode
int colour_plane_id; //!< colour_plane_id of the current coded slice
int ChromaArrayType;
// For MB level frame/field coding
int mb_aff_frame_flag;
// for signalling to the neighbour logic that this is a deblocker call
int DeblockCall;
byte mixedModeEdgeFlag;
// picture error concealment
// concealment_head points to first node in list, concealment_end points to
// last node in list. Initialize both to NULL, meaning no nodes in list yet
struct concealment_node *concealment_head;
struct concealment_node *concealment_end;
DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations
int num_ref_idx_l0_active; //!< number of forward reference
int num_ref_idx_l1_active; //!< number of backward reference
int slice_group_change_cycle;
int redundant_pic_cnt;
unsigned int pre_frame_num; //!< store the frame_num in the last decoded slice. For detecting gap in frame_num.
int non_conforming_stream;
// End JVT-D101
// POC200301: from unsigned int to int
int toppoc; //poc for this top field // POC200301
int bottompoc; //poc of bottom field of frame
int framepoc; //poc of this frame // POC200301
unsigned int frame_num; //frame_num for this frame
unsigned int field_pic_flag;
byte bottom_field_flag;
//the following is for slice header syntax elements of poc
// for poc mode 0.
unsigned int pic_order_cnt_lsb;
int delta_pic_order_cnt_bottom;
// for poc mode 1.
int delta_pic_order_cnt[3];
// ////////////////////////
// for POC mode 0:
signed int PrevPicOrderCntMsb;
unsigned int PrevPicOrderCntLsb;
signed int PicOrderCntMsb;
// for POC mode 1:
unsigned int AbsFrameNum;
signed int ExpectedPicOrderCnt, PicOrderCntCycleCnt, FrameNumInPicOrderCntCycle;
unsigned int PreviousFrameNum, FrameNumOffset;
int ExpectedDeltaPerPicOrderCntCycle;
int PreviousPOC, ThisPOC;
int PreviousFrameNumOffset;
// /////////////////////////
int idr_flag;
int nal_reference_idc; //!< nal_reference_idc from NAL unit
int idr_pic_id;
int MaxFrameNum;
unsigned int PicWidthInMbs;
unsigned int PicHeightInMapUnits;
unsigned int FrameHeightInMbs;
unsigned int PicHeightInMbs;
unsigned int PicSizeInMbs;
unsigned int FrameSizeInMbs;
unsigned int oldFrameSizeInMbs;
int no_output_of_prior_pics_flag;
int long_term_reference_flag;
int adaptive_ref_pic_buffering_flag;
int last_has_mmco_5;
int last_pic_bottom_field;
// Fidelity Range Extensions Stuff
short bitdepth_luma;
short bitdepth_chroma;
int bitdepth_scale[2];
int bitdepth_luma_qp_scale;
int bitdepth_chroma_qp_scale;
unsigned int dc_pred_value_comp[MAX_PLANE]; //!< component value for DC prediction (depends on component pel bit depth)
int max_pel_value_comp[MAX_PLANE]; //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth)
int Transform8x8Mode;
int profile_idc;
int yuv_format;
int lossless_qpprime_flag;
int num_blk8x8_uv;
int num_uv_blocks;
int num_cdc_coeff;
int mb_cr_size_x;
int mb_cr_size_y;
int mb_cr_size_x_blk;
int mb_cr_size_y_blk;
int mb_size[3][2]; //!< component macroblock dimensions
int mb_size_blk[3][2]; //!< component macroblock dimensions
int mb_size_shift[3][2];
int subpel_x;
int subpel_y;
int shiftpel_x;
int shiftpel_y;
int max_vmv_r; //!< maximum vertical motion vector range in luma quarter frame pixel units for the current level_idc
int max_mb_vmv_r; //!< maximum vertical motion vector range in luma quarter pixel units for the current level_idc
// picture error concealment
int last_ref_pic_poc;
int ref_poc_gap;
int poc_gap;
int earlier_missing_poc;
unsigned int frame_to_conceal;
int IDR_concealment_flag;
int conceal_slice_type;
// random access point decoding
int recovery_point;
int recovery_point_found;
int recovery_frame_cnt;
int recovery_frame_num;
int recovery_poc;
int separate_colour_plane_flag;
int frame_number;
int init_bl_done;
// Redundant slices. Should be moved to another structure and allocated only if extended profile
unsigned int previous_frame_num; //!< frame number of previous slice
int ref_flag[17]; //!< 0: i-th previous frame is incorrect
//!< non-zero: i-th previous frame is correct
int Is_primary_correct; //!< if primary frame is correct, 0: incorrect
int Is_redundant_correct; //!< if redundant frame is correct, 0:incorrect
int redundant_slice_ref_idx; //!< reference index of redundant slice
//FILE *p_log; //!< SNR file
int LastAccessUnitExists;
int NALUCount;
Boolean global_init_done;
int *qp_per_matrix;
int *qp_rem_matrix;
struct frame_store *last_out_fs;
int pocs_in_dpb[100];
struct storable_picture *dec_picture;
struct storable_picture *dec_picture_JV[MAX_PLANE]; //!< dec_picture to be used during 4:4:4 independent mode decoding
struct storable_picture *no_reference_picture; //!< dummy storable picture for recovery point
struct storable_picture **listX[6];
// Error parameters
struct object_buffer *erc_object_list;
struct ercVariables_s *erc_errorVar;
int erc_mvperMB;
struct img_par *erc_img;
int ec_flag[SE_MAX_ELEMENTS]; //!< array to set errorconcealment
struct memory_input_struct *mem_input;
struct frame_store *out_buffer;
struct storable_picture *pending_output;
int pending_output_state;
int recovery_flag;
// dpb
struct decoded_picture_buffer *p_Dpb;
char listXsize[6];
// report
char cslice_type[9];
// FMO
int *MbToSliceGroupMap;
int *MapUnitToSliceGroupMap;
int NumberOfSliceGroups; // the number of slice groups -1 (0 == scan order, 7 == maximum)
#if (ENABLE_OUTPUT_TONEMAPPING)
struct tone_mapping_struct_s *seiToneMapping;
#endif
// benski> buffer of storablge pictures ready for output.
// might be able to optimize a tad by making a ringbuffer, but i doubt it matters
struct storable_picture **out_pictures;
size_t size_out_pictures;
size_t num_out_pictures;
ImageCache image_cache[2]; // [0] is luma [1] is chroma (shared for both planes)
MotionCache motion_cache;
h264_pic_position *PicPos; //! Helper array to access macroblock positions.
NALU_t *nalu; // a cache so we don't re-alloc every time
void (*getNeighbour) (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
void (*getNeighbourPX_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
void (*getNeighbourXP_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
void (*getNeighbourLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*getNeighbourPXLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*getNeighbourXPLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*getNeighbourLeftLuma)(const Macroblock *currMB, PixelPos *pix);
void (*getNeighbourNXLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN<0, yN full range
void (*getNeighbourLeft) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN<0, yN=0
void (*getNeighbourUp) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN=0, yN<0
void (*getNeighbourNX) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN full range
void (*getNeighbourNP) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0
void (*getNeighbourNPChromaNB)(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0
void (*getNeighbour0X) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN=0, yN full range
void (*getNeighbour0XLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN=0, yN full range
void (*getNeighbourX0) (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); // xN full range, yN = 0
void (*getNeighbourUpLuma) (const Macroblock *currMB, PixelPos *pix); // xN=0, yN<0
void (*getNeighbourNPLumaNB)(const Macroblock *currMB, int yN, PixelPos *pix);
void (*getNeighbourPXLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*getNeighbourPXLumaNB_NoPos)(const Macroblock *currMB, int yN, PixelPos *pix);
void (*getNeighbourPPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*getNeighbourXPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*getNeighbourXPLumaNB_NoPos)(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
void (*get_mb_block_pos) (const h264_pic_position *PicPos, int mb_addr, short *x, short *y);
void (*GetStrength) (byte Strength[16], Macroblock *MbQ, int dir,int edge, int mvlimit, struct storable_picture *p);
void (*EdgeLoopLuma) (ColorPlane pl, struct video_image *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, struct storable_picture *p);
void (*EdgeLoopChroma) (struct video_image *image, byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, struct storable_picture *p);
} VideoParameters;
// input parameters from configuration file
typedef struct inp_par
{
int intra_profile_deblocking; //!< Loop filter usage determined by flags and parameters in bitstream
// Output sequence format related variables
FrameFormat output; //!< output related information
#ifdef _LEAKYBUCKET_
unsigned long R_decoder; //!< Decoder Rate in HRD Model
unsigned long B_decoder; //!< Decoder Buffer size in HRD model
unsigned long F_decoder; //!< Decoder Initial buffer fullness in HRD model
char LeakyBucketParamFile[FILE_NAME_SIZE]; //!< LeakyBucketParamFile
#endif
// picture error concealment
int ref_poc_gap;
int poc_gap;
} InputParameters;
typedef struct old_slice_par
{
unsigned field_pic_flag;
unsigned frame_num;
int nal_ref_idc;
unsigned pic_oder_cnt_lsb;
int delta_pic_oder_cnt_bottom;
int delta_pic_order_cnt[2];
byte bottom_field_flag;
byte idr_flag;
int idr_pic_id;
int pps_id;
} OldSliceParams;
typedef struct decoder_params
{
InputParameters *p_Inp; //!< Input Parameters
VideoParameters *p_Vid; //!< Image Parameters
} DecoderParams;
#ifdef TRACE
extern FILE *p_trace; //!< Trace file
extern int bitcounter;
#endif
// prototypes
extern void error(char *text, int code);
// dynamic mem allocation
extern int init_global_buffers(VideoParameters *p_Vid);
extern void free_global_buffers(VideoParameters *p_Vid);
extern int RBSPtoSODB(byte *streamBuffer, int last_byte_pos);
extern int EBSPtoRBSP(byte *streamBuffer, int end_bytepos);
void FreePartition (DataPartition *dp, int n);
DataPartition *AllocPartition(int n);
void tracebits(const char *trace_str, int len, int info,int value1);
void tracebits2(const char *trace_str, int len, int info);
unsigned CeilLog2 ( unsigned uiVal);
unsigned CeilLog2_sf( unsigned uiVal);
// For 4:4:4 independent mode
extern void change_plane_JV( VideoParameters *p_Vid, int nplane );
extern void make_frame_picture_JV(VideoParameters *p_Vid);
#endif
|